diff mbox series

[v3,4/4,NOT,FOR,MERGE] tests/qtest/migration: Adapt tests to use older QEMUs

Message ID 20240105180449.11562-5-farosas@suse.de
State New
Headers show
Series migration & CI: Add a CI job for migration compat testing | expand

Commit Message

Fabiano Rosas Jan. 5, 2024, 6:04 p.m. UTC
[This patch is not necessary anymore after 8.2 has been released]

Add the 'since' annotations to recently added tests and adapt the
postcopy test to use the older "uri" API when needed.

Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 tests/qtest/migration-test.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

Comments

Peter Xu Jan. 8, 2024, 8:15 a.m. UTC | #1
On Fri, Jan 05, 2024 at 03:04:49PM -0300, Fabiano Rosas wrote:
> [This patch is not necessary anymore after 8.2 has been released]
> 
> Add the 'since' annotations to recently added tests and adapt the
> postcopy test to use the older "uri" API when needed.
> 
> Signed-off-by: Fabiano Rosas <farosas@suse.de>

You marked this as not-for-merge.  Would something like this still be
useful in the future?  IIUC it's a matter of whether we'd still want to
test those old binaries.

> ---
>  tests/qtest/migration-test.c | 34 +++++++++++++++++++++++++++-------
>  1 file changed, 27 insertions(+), 7 deletions(-)
> 
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 001470238b..599f51f978 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -1338,14 +1338,21 @@ static int migrate_postcopy_prepare(QTestState **from_ptr,
>      migrate_ensure_non_converge(from);
>  
>      migrate_prepare_for_dirty_mem(from);
> -    qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
> -                             "  'arguments': { "
> -                             "      'channels': [ { 'channel-type': 'main',"
> -                             "      'addr': { 'transport': 'socket',"
> -                             "                'type': 'inet',"
> -                             "                'host': '127.0.0.1',"
> -                             "                'port': '0' } } ] } }");
>  
> +    /* New syntax was introduced in 8.2 */
> +    if (migration_vercmp(to, "8.2") < 0) {
> +        qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
> +                                 "  'arguments': { "
> +                                 "      'uri': 'tcp:127.0.0.1:0' } }");
> +    } else {
> +        qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
> +                                 "  'arguments': { "
> +                                 "      'channels': [ { 'channel-type': 'main',"
> +                                 "      'addr': { 'transport': 'socket',"
> +                                 "                'type': 'inet',"
> +                                 "                'host': '127.0.0.1',"
> +                                 "                'port': '0' } } ] } }");
> +    }
>      /* Wait for the first serial output from the source */
>      wait_for_serial("src_serial");
>  
> @@ -1603,6 +1610,9 @@ static void test_postcopy_recovery_double_fail(void)
>  {
>      MigrateCommon args = {
>          .postcopy_recovery_test_fail = true,
> +        .start = {
> +            .since = "8.2",
> +        },
>      };
>  
>      test_postcopy_recovery_common(&args);
> @@ -1665,6 +1675,7 @@ static void test_analyze_script(void)
>  {
>      MigrateStart args = {
>          .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
> +        .since = "8.2",
>      };
>      QTestState *from, *to;
>      g_autofree char *uri = NULL;
> @@ -2090,6 +2101,9 @@ static void test_precopy_file(void)
>      MigrateCommon args = {
>          .connect_uri = uri,
>          .listen_uri = "defer",
> +        .start = {
> +            .since = "8.2"
> +        },
>      };
>  
>      test_file_common(&args, true);
> @@ -2134,6 +2148,9 @@ static void test_precopy_file_offset(void)
>          .connect_uri = uri,
>          .listen_uri = "defer",
>          .finish_hook = file_offset_finish_hook,
> +        .start = {
> +            .since = "8.2"
> +        },
>      };
>  
>      test_file_common(&args, false);
> @@ -2148,6 +2165,9 @@ static void test_precopy_file_offset_bad(void)
>          .connect_uri = uri,
>          .listen_uri = "defer",
>          .result = MIG_TEST_QMP_ERROR,
> +        .start = {
> +            .since = "8.2"
> +        },
>      };
>  
>      test_file_common(&args, false);
> -- 
> 2.35.3
>
Fabiano Rosas Jan. 8, 2024, 3:37 p.m. UTC | #2
Peter Xu <peterx@redhat.com> writes:

> On Fri, Jan 05, 2024 at 03:04:49PM -0300, Fabiano Rosas wrote:
>> [This patch is not necessary anymore after 8.2 has been released]
>> 
>> Add the 'since' annotations to recently added tests and adapt the
>> postcopy test to use the older "uri" API when needed.
>> 
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>
> You marked this as not-for-merge.  Would something like this still be
> useful in the future?  IIUC it's a matter of whether we'd still want to
> test those old binaries.
>

Technically yes, but I fail to see what benefit testing old binaries
would bring us. I'm thinking maybe it could be useful for bisecting
compatibility issues, but I can't think of a scenario where we'd like to
change the older QEMU instead of the newer.

I'm of course open to suggestions if you or anyone else has an use case
that you'd like to keep viable.

So far, my idea is that once a new QEMU is released, all the "since:"
annotations become obsolete. We could even remove them. This series is
just infrastructure to make our life easier if a change is ever
introduced that is incompatible with the n-1 QEMU. IMO we cannot have
compatibility testing if a random change might break a test and make it
more difficult to run the remaining tests. So we'd use 'since' or the
vercmp function to skip/adapt the offending tests until the next QEMU is
released.

I'm basing myself on this loosely worded support statement from our
docs:

  "In general QEMU tries to maintain forward migration compatibility
  (i.e. migrating from QEMU n->n+1) and there are users who benefit from
  backward compatibility as well."
Peter Xu Jan. 9, 2024, 3:51 a.m. UTC | #3
On Mon, Jan 08, 2024 at 12:37:46PM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Fri, Jan 05, 2024 at 03:04:49PM -0300, Fabiano Rosas wrote:
> >> [This patch is not necessary anymore after 8.2 has been released]
> >> 
> >> Add the 'since' annotations to recently added tests and adapt the
> >> postcopy test to use the older "uri" API when needed.
> >> 
> >> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> >
> > You marked this as not-for-merge.  Would something like this still be
> > useful in the future?  IIUC it's a matter of whether we'd still want to
> > test those old binaries.
> >
> 
> Technically yes, but I fail to see what benefit testing old binaries
> would bring us. I'm thinking maybe it could be useful for bisecting
> compatibility issues, but I can't think of a scenario where we'd like to
> change the older QEMU instead of the newer.
> 
> I'm of course open to suggestions if you or anyone else has an use case
> that you'd like to keep viable.
> 
> So far, my idea is that once a new QEMU is released, all the "since:"
> annotations become obsolete. We could even remove them. This series is
> just infrastructure to make our life easier if a change is ever
> introduced that is incompatible with the n-1 QEMU. IMO we cannot have
> compatibility testing if a random change might break a test and make it
> more difficult to run the remaining tests. So we'd use 'since' or the
> vercmp function to skip/adapt the offending tests until the next QEMU is
> released.
> 
> I'm basing myself on this loosely worded support statement from our
> docs:
> 
>   "In general QEMU tries to maintain forward migration compatibility
>   (i.e. migrating from QEMU n->n+1) and there are users who benefit from
>   backward compatibility as well."

I think we could still have users migrating from e.g. 8.0 -> 9.0 as long as
with the same machine type, especially when upgrading upper level stack
(e.g. an openstack cluster upgrade), where IIUC can jump a few qemu major
versions.  That does sound like a common use case, and I suspect the doc
was only taking one example on why compatibility needs to be maintained,
rather than emphasizing "+1 only".

However then the question is whether those old binaries needs to be
convered.

Then I noticed that taking all these "since: XXX" and cmdline changes along
with migration-test may be yet another burden even if we want to cover old
binaries for whatever reason.  I am now more convinced myself that we
should try to get rid of as much burden as we can for migration, because we
already have enough, and it's not ideal to keep growing that unnecessarily.

One good thing with CI in this case (I still don't have enough knowledge on
CI, so I am hoping some CI people can review that patch, though) is that if
we can always guarantee n-1 -> n works for the test cases we enabled, it
most probably means when n boosts again to n+1, we keep making sure n ->
n+1 works perfectly, then n-1 -> n+1 should not fail either, considering
that we're testing the stream protocol matching each other.  There might be
outliers (especially if not described with VMSDs) but should be corner
cases.

So I tend to agree with you on that we drop this patch, keep it simple
until we're much more clear what we can get from that.

But then if so - do we need "since" at all to be expressed in versions?

Basically we keep qtest always be valid only on the latest qemu binary as
before (which actually works the same as Linux v.s. kselftests, which makes
sense), there's one exception now with "n-1" due to the CI we plan to add.
Dropping this patch means we don't yet plan to support n-2.  Then maybe
instead of a "since" we only need a boolean showing "whether one test needs
to be covered by a cross-binary test"?  Then we set it in incompatible
binaries (skip all cross-binary tests directly, rather than relying on any
qemu versions, no compare needed), and can also drop that when a new
release starts.

Thanks,
Fabiano Rosas Jan. 9, 2024, 2:46 p.m. UTC | #4
Peter Xu <peterx@redhat.com> writes:

> On Mon, Jan 08, 2024 at 12:37:46PM -0300, Fabiano Rosas wrote:
>> Peter Xu <peterx@redhat.com> writes:
>> 
>> > On Fri, Jan 05, 2024 at 03:04:49PM -0300, Fabiano Rosas wrote:
>> >> [This patch is not necessary anymore after 8.2 has been released]
>> >> 
>> >> Add the 'since' annotations to recently added tests and adapt the
>> >> postcopy test to use the older "uri" API when needed.
>> >> 
>> >> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> >
>> > You marked this as not-for-merge.  Would something like this still be
>> > useful in the future?  IIUC it's a matter of whether we'd still want to
>> > test those old binaries.
>> >
>> 
>> Technically yes, but I fail to see what benefit testing old binaries
>> would bring us. I'm thinking maybe it could be useful for bisecting
>> compatibility issues, but I can't think of a scenario where we'd like to
>> change the older QEMU instead of the newer.
>> 
>> I'm of course open to suggestions if you or anyone else has an use case
>> that you'd like to keep viable.
>> 
>> So far, my idea is that once a new QEMU is released, all the "since:"
>> annotations become obsolete. We could even remove them. This series is
>> just infrastructure to make our life easier if a change is ever
>> introduced that is incompatible with the n-1 QEMU. IMO we cannot have
>> compatibility testing if a random change might break a test and make it
>> more difficult to run the remaining tests. So we'd use 'since' or the
>> vercmp function to skip/adapt the offending tests until the next QEMU is
>> released.
>> 
>> I'm basing myself on this loosely worded support statement from our
>> docs:
>> 
>>   "In general QEMU tries to maintain forward migration compatibility
>>   (i.e. migrating from QEMU n->n+1) and there are users who benefit from
>>   backward compatibility as well."
>
> I think we could still have users migrating from e.g. 8.0 -> 9.0 as long as
> with the same machine type, especially when upgrading upper level stack
> (e.g. an openstack cluster upgrade), where IIUC can jump a few qemu major
> versions.  That does sound like a common use case, and I suspect the doc
> was only taking one example on why compatibility needs to be maintained,
> rather than emphasizing "+1 only".

Oh, I would expect people to be migrating in all sorts of ways. But we
need to think in terms of what upstream QEMU supports so we can guide
the development. And hopefully have a test for everything we actually
support and everyone that touches migration code having the same view on
this.

I can barely think about n->n+1 to be honest, that's why I was writing
this compatibility test even before Juan asked for it.

You raise a good point about a cloud provider or distro jumping major
versions. That's a tricky situation. Because then their support
statement would potentially cover something that's completely different
from what we're testing upstream.

> However then the question is whether those old binaries needs to be
> convered.
>
> Then I noticed that taking all these "since: XXX" and cmdline changes along
> with migration-test may be yet another burden even if we want to cover old
> binaries for whatever reason.  I am now more convinced myself that we
> should try to get rid of as much burden as we can for migration, because we
> already have enough, and it's not ideal to keep growing that unnecessarily.
>
> One good thing with CI in this case (I still don't have enough knowledge on
> CI, so I am hoping some CI people can review that patch, though) is that if
> we can always guarantee n-1 -> n works for the test cases we enabled, it
> most probably means when n boosts again to n+1, we keep making sure n ->
> n+1 works perfectly, then n-1 -> n+1 should not fail either, considering
> that we're testing the stream protocol matching each other.  There might be
> outliers (especially if not described with VMSDs) but should be corner
> cases.

I agree that the transitivity should be preserved. If we could override
the QEMU_PREV_VERSION variable in the CI script, that would be an easy
way of running a sanity check every once in a while.

> So I tend to agree with you on that we drop this patch, keep it simple
> until we're much more clear what we can get from that.
>
> But then if so - do we need "since" at all to be expressed in versions?

I agree that we don't need "since" semantics.

> Basically we keep qtest always be valid only on the latest qemu binary as
> before (which actually works the same as Linux v.s. kselftests, which makes
> sense), there's one exception now with "n-1" due to the CI we plan to add.
> Dropping this patch means we don't yet plan to support n-2.  Then maybe
> instead of a "since" we only need a boolean showing "whether one test needs
> to be covered by a cross-binary test"?  Then we set it in incompatible
> binaries (skip all cross-binary tests directly, rather than relying on any
> qemu versions, no compare needed), and can also drop that when a new
> release starts.

Hm, it would be better to avoid the extra maintenance task at the start
of every release, no? It also blocks us from doing n-2 even
experimentally.
Peter Xu Jan. 10, 2024, 4:08 a.m. UTC | #5
On Tue, Jan 09, 2024 at 11:46:32AM -0300, Fabiano Rosas wrote:
> Hm, it would be better to avoid the extra maintenance task at the start
> of every release, no? It also blocks us from doing n-2 even
> experimentally.

See my other reply, on whether we can use "n-1" for migration-test.  If
that can work for us, then IIUC we can avoid either "since:" or any
relevant flag, neither do we need to unmask tests after each releases.  All
old tests should always "just work" with a new qemu binary.

One drawback I can think of is, new tests (even if applicable to old qemu
binaries) will only start to take effect on cross-binary test until the
next release, but that's not so bad I assume.

Since the QTEST_QEMU_BINARY_SRC|DST function is already merged in 8.2, I
think we can already start kicking them and enable them for 9.0 if it works.
Fabiano Rosas Jan. 10, 2024, 2:42 p.m. UTC | #6
Peter Xu <peterx@redhat.com> writes:

> On Tue, Jan 09, 2024 at 11:46:32AM -0300, Fabiano Rosas wrote:
>> Hm, it would be better to avoid the extra maintenance task at the start
>> of every release, no? It also blocks us from doing n-2 even
>> experimentally.
>
> See my other reply, on whether we can use "n-1" for migration-test.  If
> that can work for us, then IIUC we can avoid either "since:" or any
> relevant flag, neither do we need to unmask tests after each releases.  All
> old tests should always "just work" with a new qemu binary.

Hmm.. There are some assumptions here:

1) New code will always be compatible with old tests. E.g. some
   patchseries changed code and changed a test to match the new
   code. Then we'd need a flag like 'since' anyway to mark that the new
   QEMU cannot be used with the old test.

   (if new QEMU is not compatible with old tests without any good
   reason, then that's just a regression I think)

2) There would not be issues when fixing bugs/refactoring
   tests. E.g. old tests had a bug that is now fixed, but since we're
   not using the new tests, the bug is always there until next
   release. This could block the entire test suite, specially with
   concurrency bugs which can start triggering due to changes in timing.

3) New code that can only be reached via new tests cannot cause
   regressions. E.g. new code is added but is kept under a machine
   property or migration capability. That code will only show the
   regression after the new test enables that cap/property. At that
   point it's too late because it was already released.

In general I like the simplicity of your approach, but it would be
annoying to change this series only to find out we still need some sort
of flag later. Even worse, #3 would miss the point of this kind of
testing entirely.

#1 could be mitigated by a "no changes to tests rule". We'd start
requiring that new tests be written and an existing test is never
altered. For #2 and #3 I don't have a solution.
Peter Xu Jan. 11, 2024, 2:35 a.m. UTC | #7
On Wed, Jan 10, 2024 at 11:42:18AM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Tue, Jan 09, 2024 at 11:46:32AM -0300, Fabiano Rosas wrote:
> >> Hm, it would be better to avoid the extra maintenance task at the start
> >> of every release, no? It also blocks us from doing n-2 even
> >> experimentally.
> >
> > See my other reply, on whether we can use "n-1" for migration-test.  If
> > that can work for us, then IIUC we can avoid either "since:" or any
> > relevant flag, neither do we need to unmask tests after each releases.  All
> > old tests should always "just work" with a new qemu binary.
> 
> Hmm.. There are some assumptions here:
> 
> 1) New code will always be compatible with old tests. E.g. some
>    patchseries changed code and changed a test to match the new
>    code. Then we'd need a flag like 'since' anyway to mark that the new
>    QEMU cannot be used with the old test.
> 
>    (if new QEMU is not compatible with old tests without any good
>    reason, then that's just a regression I think)

Exactly what you are saying here.  We can't make new QEMU not working on
old tests.

One way to simplify the understanding is, we can imagine the old tests as
"some user currently using the old QEMU, and who would like to migrate to
the master QEMU binary".  Such user only uses exactly the same cmdline we
used for testing migration-test in exactly that n-1 qemu release binary.

If we fail that old test, it means we can already fail such an user.
That's destined a regression to me, no?  Or, do you have a solid example?

The only thing I can think of is, when we want to e.g. obsolete a QEMU
cmdline that is used in migration-test.  But then that cmdline needs to be
declared obsolete first for a few releases (let's say, 4), and before that
deadline we should already rewrite migration-test to not use it, and as
long as we do it in 3 releases I suppose nothing will be affected.

> 
> 2) There would not be issues when fixing bugs/refactoring
>    tests. E.g. old tests had a bug that is now fixed, but since we're
>    not using the new tests, the bug is always there until next
>    release. This could block the entire test suite, specially with
>    concurrency bugs which can start triggering due to changes in timing.

Yes this might be a problem.  Note that the old tests we're using will be
exactly the same test we released previous QEMU.  I am "assuming" that the
test case is as stable as the released QEMU, since we kept running it for
all pulls in CI runs.  If we see anything flaky, we should mark it
especially right before the release, then the released tests will be
considerably stable.

The worst case is we still keep a knob in the CI file, and we can turn off
n-1 -> n tests for the CI for some release if there's some unfortunate
accident.  But I hope in reality that can be avoided.

> 
> 3) New code that can only be reached via new tests cannot cause
>    regressions. E.g. new code is added but is kept under a machine
>    property or migration capability. That code will only show the
>    regression after the new test enables that cap/property. At that
>    point it's too late because it was already released.

I can't say I fully get the point here.  New code, if with a new cap with
it, should run exactly like the old code if the cap is not turned on.  I
suppose that's the case for when we only run n-1 version of migration-test.
IMHO it's the same issue as 1) above, that we just should not break it, and
if we do, that's exactly what we want to capture and fix in master, not n-1
branch.

But as I said, perhaps I didn't really get the issue you wanted to describe..

> 
> In general I like the simplicity of your approach, but it would be
> annoying to change this series only to find out we still need some sort
> of flag later. Even worse, #3 would miss the point of this kind of
> testing entirely.
> 
> #1 could be mitigated by a "no changes to tests rule". We'd start
> requiring that new tests be written and an existing test is never
> altered. For #2 and #3 I don't have a solution.
>
Fabiano Rosas Jan. 11, 2024, 1:58 p.m. UTC | #8
Peter Xu <peterx@redhat.com> writes:

> On Wed, Jan 10, 2024 at 11:42:18AM -0300, Fabiano Rosas wrote:
>> Peter Xu <peterx@redhat.com> writes:
>> 
>> > On Tue, Jan 09, 2024 at 11:46:32AM -0300, Fabiano Rosas wrote:
>> >> Hm, it would be better to avoid the extra maintenance task at the start
>> >> of every release, no? It also blocks us from doing n-2 even
>> >> experimentally.
>> >
>> > See my other reply, on whether we can use "n-1" for migration-test.  If
>> > that can work for us, then IIUC we can avoid either "since:" or any
>> > relevant flag, neither do we need to unmask tests after each releases.  All
>> > old tests should always "just work" with a new qemu binary.
>> 
>> Hmm.. There are some assumptions here:
>> 
>> 1) New code will always be compatible with old tests. E.g. some
>>    patchseries changed code and changed a test to match the new
>>    code. Then we'd need a flag like 'since' anyway to mark that the new
>>    QEMU cannot be used with the old test.
>> 
>>    (if new QEMU is not compatible with old tests without any good
>>    reason, then that's just a regression I think)
>
> Exactly what you are saying here.  We can't make new QEMU not working on
> old tests.

Ok, so we need to forbid breaking changes to tests from now on. I'll try
to add some words in the docs about this.

>
> One way to simplify the understanding is, we can imagine the old tests as
> "some user currently using the old QEMU, and who would like to migrate to
> the master QEMU binary".  Such user only uses exactly the same cmdline we
> used for testing migration-test in exactly that n-1 qemu release binary.
>
> If we fail that old test, it means we can already fail such an user.
> That's destined a regression to me, no?  Or, do you have a solid example?

For instance, we used to not issue the SETUP event on incoming. If a
test (or user app) expected to see the ACTIVE or FAILED states, then
would it be a regression to now start issuing the SETUP event at the
proper place?

Anyway, it's pointless to give examples because we either allow old
tests to be changed or we don't. If we don't then that's solved. If we
do, we'll always have space for the situation I mentioned in 1) above.

> The only thing I can think of is, when we want to e.g. obsolete a QEMU
> cmdline that is used in migration-test.  But then that cmdline needs to be
> declared obsolete first for a few releases (let's say, 4), and before that
> deadline we should already rewrite migration-test to not use it, and as
> long as we do it in 3 releases I suppose nothing will be affected.
>
>> 
>> 2) There would not be issues when fixing bugs/refactoring
>>    tests. E.g. old tests had a bug that is now fixed, but since we're
>>    not using the new tests, the bug is always there until next
>>    release. This could block the entire test suite, specially with
>>    concurrency bugs which can start triggering due to changes in timing.
>
> Yes this might be a problem.  Note that the old tests we're using will be
> exactly the same test we released previous QEMU.  I am "assuming" that the
> test case is as stable as the released QEMU, since we kept running it for
> all pulls in CI runs.  If we see anything flaky, we should mark it
> especially right before the release, then the released tests will be
> considerably stable.

It's not just the test case. The whole test infrastructure could change
entirely. But let's maybe cross that bridge when we get to it.

>
> The worst case is we still keep a knob in the CI file, and we can turn off
> n-1 -> n tests for the CI for some release if there's some unfortunate
> accident. But I hope in reality that can be avoided.
>
>> 
>> 3) New code that can only be reached via new tests cannot cause
>>    regressions. E.g. new code is added but is kept under a machine
>>    property or migration capability. That code will only show the
>>    regression after the new test enables that cap/property. At that
>>    point it's too late because it was already released.
>
> I can't say I fully get the point here.  New code, if with a new cap with
> it, should run exactly like the old code if the cap is not turned on.  I
> suppose that's the case for when we only run n-1 version of migration-test.
> IMHO it's the same issue as 1) above, that we just should not break it, and
> if we do, that's exactly what we want to capture and fix in master, not n-1
> branch.
>
> But as I said, perhaps I didn't really get the issue you wanted to describe..

if (cap_foo()) {
   <do something bad>
}

This^ only executes once we have a test that enables cap_foo. If the
"something bad" is something that breaks compatibility, then we'll miss
it when using n-1 migration-test.

Now that I think about it, should we parameterize the CI so we can
actually switch between old migration-tests and new migration-tests? So
we make the default what you suggest, but still have the ability to
trigger a job every once in a while that uses the new tests.
Peter Xu Jan. 15, 2024, 4:13 a.m. UTC | #9
On Thu, Jan 11, 2024 at 10:58:49AM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Wed, Jan 10, 2024 at 11:42:18AM -0300, Fabiano Rosas wrote:
> >> Peter Xu <peterx@redhat.com> writes:
> >> 
> >> > On Tue, Jan 09, 2024 at 11:46:32AM -0300, Fabiano Rosas wrote:
> >> >> Hm, it would be better to avoid the extra maintenance task at the start
> >> >> of every release, no? It also blocks us from doing n-2 even
> >> >> experimentally.
> >> >
> >> > See my other reply, on whether we can use "n-1" for migration-test.  If
> >> > that can work for us, then IIUC we can avoid either "since:" or any
> >> > relevant flag, neither do we need to unmask tests after each releases.  All
> >> > old tests should always "just work" with a new qemu binary.
> >> 
> >> Hmm.. There are some assumptions here:
> >> 
> >> 1) New code will always be compatible with old tests. E.g. some
> >>    patchseries changed code and changed a test to match the new
> >>    code. Then we'd need a flag like 'since' anyway to mark that the new
> >>    QEMU cannot be used with the old test.
> >> 
> >>    (if new QEMU is not compatible with old tests without any good
> >>    reason, then that's just a regression I think)
> >
> > Exactly what you are saying here.  We can't make new QEMU not working on
> > old tests.
> 
> Ok, so we need to forbid breaking changes to tests from now on. I'll try
> to add some words in the docs about this.
> 
> >
> > One way to simplify the understanding is, we can imagine the old tests as
> > "some user currently using the old QEMU, and who would like to migrate to
> > the master QEMU binary".  Such user only uses exactly the same cmdline we
> > used for testing migration-test in exactly that n-1 qemu release binary.
> >
> > If we fail that old test, it means we can already fail such an user.
> > That's destined a regression to me, no?  Or, do you have a solid example?
> 
> For instance, we used to not issue the SETUP event on incoming. If a
> test (or user app) expected to see the ACTIVE or FAILED states, then
> would it be a regression to now start issuing the SETUP event at the
> proper place?

Valid example.  And it's a tricky example in that it actually breaks the
ABI even though slightly, however events are just normally more flexible in
this case, so we didn't care.

I think it means we didn't care any program expecting no SETUP before
ACTIVE, or such user already crashes.

Our migration-test is compatible with such change, right?

I think the trick here is we shouldn't make migration-test to ever contain
any "assumption" of the internals of QEMU.  It should only behave strictly
as what an user can use QEMU, and that should always be guaranteed to work
on newer qemu binaries.  Then breaking old migration-test will be the same
as breaking an user, and it'll naturally fit in this model too of using n-1
version of migration-test.

> 
> Anyway, it's pointless to give examples because we either allow old
> tests to be changed or we don't. If we don't then that's solved. If we
> do, we'll always have space for the situation I mentioned in 1) above.

IMHO we should allow any changes to old tests, IMHO.  It won't apply to n-1
test anyway, not until the next release.  It may depend on how you define
"changed" in this case.

> 
> > The only thing I can think of is, when we want to e.g. obsolete a QEMU
> > cmdline that is used in migration-test.  But then that cmdline needs to be
> > declared obsolete first for a few releases (let's say, 4), and before that
> > deadline we should already rewrite migration-test to not use it, and as
> > long as we do it in 3 releases I suppose nothing will be affected.
> >
> >> 
> >> 2) There would not be issues when fixing bugs/refactoring
> >>    tests. E.g. old tests had a bug that is now fixed, but since we're
> >>    not using the new tests, the bug is always there until next
> >>    release. This could block the entire test suite, specially with
> >>    concurrency bugs which can start triggering due to changes in timing.
> >
> > Yes this might be a problem.  Note that the old tests we're using will be
> > exactly the same test we released previous QEMU.  I am "assuming" that the
> > test case is as stable as the released QEMU, since we kept running it for
> > all pulls in CI runs.  If we see anything flaky, we should mark it
> > especially right before the release, then the released tests will be
> > considerably stable.
> 
> It's not just the test case. The whole test infrastructure could change
> entirely. But let's maybe cross that bridge when we get to it.
> 
> >
> > The worst case is we still keep a knob in the CI file, and we can turn off
> > n-1 -> n tests for the CI for some release if there's some unfortunate
> > accident. But I hope in reality that can be avoided.
> >
> >> 
> >> 3) New code that can only be reached via new tests cannot cause
> >>    regressions. E.g. new code is added but is kept under a machine
> >>    property or migration capability. That code will only show the
> >>    regression after the new test enables that cap/property. At that
> >>    point it's too late because it was already released.
> >
> > I can't say I fully get the point here.  New code, if with a new cap with
> > it, should run exactly like the old code if the cap is not turned on.  I
> > suppose that's the case for when we only run n-1 version of migration-test.
> > IMHO it's the same issue as 1) above, that we just should not break it, and
> > if we do, that's exactly what we want to capture and fix in master, not n-1
> > branch.
> >
> > But as I said, perhaps I didn't really get the issue you wanted to describe..
> 
> if (cap_foo()) {
>    <do something bad>
> }
> 
> This^ only executes once we have a test that enables cap_foo. If the
> "something bad" is something that breaks compatibility, then we'll miss
> it when using n-1 migration-test.

IMHO the n-1 tests are not for this.  The new FOO cap can only be enabled
in n+ versions anyway, so something like above should be covered by the
normal migration test that anyone would like to propose the new FOO cap.
The n-1 test we're discussing is extra tests on top of that.  So:

  - Same binary test: we (of course) keep running migration-test for
    master, covers FOO

  - Cross binary testA: we (hopefully since 9.0?) runs n-1 migration-test
    for previous release

Then after n boosts, the new FOO test (that will enable FOO) will become
part of n-1 tests.

> 
> Now that I think about it, should we parameterize the CI so we can
> actually switch between old migration-tests and new migration-tests? So
> we make the default what you suggest, but still have the ability to
> trigger a job every once in a while that uses the new tests.

Certainly. Such a knob will never hurt, I assume.  It's just that I'd
expect new migration-test could constantly fail the cross-binary test as
long as we introduce new features.  Maybe it's a matter of whether we would
like migration-test itself to understand the "version" idea.

What I was saying above is trying to reduce our burden to teach
migration-test to understand any version concept.  So migration-test always
applies only to the master branch (and newer; due to migration's strict
ABI), no need to detect any cap as long as master supports it.
Fabiano Rosas Jan. 15, 2024, 1:45 p.m. UTC | #10
Peter Xu <peterx@redhat.com> writes:

> On Thu, Jan 11, 2024 at 10:58:49AM -0300, Fabiano Rosas wrote:
>> Peter Xu <peterx@redhat.com> writes:
>> 
>> > On Wed, Jan 10, 2024 at 11:42:18AM -0300, Fabiano Rosas wrote:
>> >> Peter Xu <peterx@redhat.com> writes:
>> >> 
>> >> > On Tue, Jan 09, 2024 at 11:46:32AM -0300, Fabiano Rosas wrote:
>> >> >> Hm, it would be better to avoid the extra maintenance task at the start
>> >> >> of every release, no? It also blocks us from doing n-2 even
>> >> >> experimentally.
>> >> >
>> >> > See my other reply, on whether we can use "n-1" for migration-test.  If
>> >> > that can work for us, then IIUC we can avoid either "since:" or any
>> >> > relevant flag, neither do we need to unmask tests after each releases.  All
>> >> > old tests should always "just work" with a new qemu binary.
>> >> 
>> >> Hmm.. There are some assumptions here:
>> >> 
>> >> 1) New code will always be compatible with old tests. E.g. some
>> >>    patchseries changed code and changed a test to match the new
>> >>    code. Then we'd need a flag like 'since' anyway to mark that the new
>> >>    QEMU cannot be used with the old test.
>> >> 
>> >>    (if new QEMU is not compatible with old tests without any good
>> >>    reason, then that's just a regression I think)
>> >
>> > Exactly what you are saying here.  We can't make new QEMU not working on
>> > old tests.
>> 
>> Ok, so we need to forbid breaking changes to tests from now on. I'll try
>> to add some words in the docs about this.
>> 
>> >
>> > One way to simplify the understanding is, we can imagine the old tests as
>> > "some user currently using the old QEMU, and who would like to migrate to
>> > the master QEMU binary".  Such user only uses exactly the same cmdline we
>> > used for testing migration-test in exactly that n-1 qemu release binary.
>> >
>> > If we fail that old test, it means we can already fail such an user.
>> > That's destined a regression to me, no?  Or, do you have a solid example?
>> 
>> For instance, we used to not issue the SETUP event on incoming. If a
>> test (or user app) expected to see the ACTIVE or FAILED states, then
>> would it be a regression to now start issuing the SETUP event at the
>> proper place?
>
> Valid example.  And it's a tricky example in that it actually breaks the
> ABI even though slightly, however events are just normally more flexible in
> this case, so we didn't care.
>
> I think it means we didn't care any program expecting no SETUP before
> ACTIVE, or such user already crashes.
>
> Our migration-test is compatible with such change, right?
>
> I think the trick here is we shouldn't make migration-test to ever contain
> any "assumption" of the internals of QEMU.  It should only behave strictly
> as what an user can use QEMU, and that should always be guaranteed to work
> on newer qemu binaries.  Then breaking old migration-test will be the same
> as breaking an user, and it'll naturally fit in this model too of using n-1
> version of migration-test.
>
>> 
>> Anyway, it's pointless to give examples because we either allow old
>> tests to be changed or we don't. If we don't then that's solved. If we
>> do, we'll always have space for the situation I mentioned in 1) above.
>
> IMHO we should allow any changes to old tests, IMHO.  It won't apply to n-1
> test anyway, not until the next release.  It may depend on how you define
> "changed" in this case.
>

I mean introducing a piece of code in QEMU which requires a change in a
test. That shouldn't be allowed. Because the n-1 tests will still have
the old behavior which could potentially clash with what the new QEMU is
doing.

>> 
>> > The only thing I can think of is, when we want to e.g. obsolete a QEMU
>> > cmdline that is used in migration-test.  But then that cmdline needs to be
>> > declared obsolete first for a few releases (let's say, 4), and before that
>> > deadline we should already rewrite migration-test to not use it, and as
>> > long as we do it in 3 releases I suppose nothing will be affected.
>> >
>> >> 
>> >> 2) There would not be issues when fixing bugs/refactoring
>> >>    tests. E.g. old tests had a bug that is now fixed, but since we're
>> >>    not using the new tests, the bug is always there until next
>> >>    release. This could block the entire test suite, specially with
>> >>    concurrency bugs which can start triggering due to changes in timing.
>> >
>> > Yes this might be a problem.  Note that the old tests we're using will be
>> > exactly the same test we released previous QEMU.  I am "assuming" that the
>> > test case is as stable as the released QEMU, since we kept running it for
>> > all pulls in CI runs.  If we see anything flaky, we should mark it
>> > especially right before the release, then the released tests will be
>> > considerably stable.
>> 
>> It's not just the test case. The whole test infrastructure could change
>> entirely. But let's maybe cross that bridge when we get to it.
>> 
>> >
>> > The worst case is we still keep a knob in the CI file, and we can turn off
>> > n-1 -> n tests for the CI for some release if there's some unfortunate
>> > accident. But I hope in reality that can be avoided.
>> >
>> >> 
>> >> 3) New code that can only be reached via new tests cannot cause
>> >>    regressions. E.g. new code is added but is kept under a machine
>> >>    property or migration capability. That code will only show the
>> >>    regression after the new test enables that cap/property. At that
>> >>    point it's too late because it was already released.
>> >
>> > I can't say I fully get the point here.  New code, if with a new cap with
>> > it, should run exactly like the old code if the cap is not turned on.  I
>> > suppose that's the case for when we only run n-1 version of migration-test.
>> > IMHO it's the same issue as 1) above, that we just should not break it, and
>> > if we do, that's exactly what we want to capture and fix in master, not n-1
>> > branch.
>> >
>> > But as I said, perhaps I didn't really get the issue you wanted to describe..
>> 
>> if (cap_foo()) {
>>    <do something bad>
>> }
>> 
>> This^ only executes once we have a test that enables cap_foo. If the
>> "something bad" is something that breaks compatibility, then we'll miss
>> it when using n-1 migration-test.
>
> IMHO the n-1 tests are not for this.  The new FOO cap can only be enabled
> in n+ versions anyway, so something like above should be covered by the
> normal migration test that anyone would like to propose the new FOO cap.

You're being too generous in thinking new code will always restrict
itself to implementing new functionality and never have a bug that
affects a completly different part of the code. There could be an
innocent refactoring along with cap FOO that breaks the migration only
when FOO is enabled.

But fine. We can't predict every scenario. Let's get this series out the
door.

Thanks for the comments so far. I'll spin another version.

> The n-1 test we're discussing is extra tests on top of that.  So:
>
>   - Same binary test: we (of course) keep running migration-test for
>     master, covers FOO
>
>   - Cross binary testA: we (hopefully since 9.0?) runs n-1 migration-test
>     for previous release
>
> Then after n boosts, the new FOO test (that will enable FOO) will become
> part of n-1 tests.
>
>> 
>> Now that I think about it, should we parameterize the CI so we can
>> actually switch between old migration-tests and new migration-tests? So
>> we make the default what you suggest, but still have the ability to
>> trigger a job every once in a while that uses the new tests.
>
> Certainly. Such a knob will never hurt, I assume.  It's just that I'd
> expect new migration-test could constantly fail the cross-binary test as
> long as we introduce new features.  Maybe it's a matter of whether we would
> like migration-test itself to understand the "version" idea.
>
> What I was saying above is trying to reduce our burden to teach
> migration-test to understand any version concept.  So migration-test always
> applies only to the master branch (and newer; due to migration's strict
> ABI), no need to detect any cap as long as master supports it.
Peter Xu Jan. 15, 2024, 11:28 p.m. UTC | #11
On Mon, Jan 15, 2024 at 10:45:33AM -0300, Fabiano Rosas wrote:
> > IMHO the n-1 tests are not for this.  The new FOO cap can only be enabled
> > in n+ versions anyway, so something like above should be covered by the
> > normal migration test that anyone would like to propose the new FOO cap.
> 
> You're being too generous in thinking new code will always restrict
> itself to implementing new functionality and never have a bug that
> affects a completly different part of the code. There could be an
> innocent refactoring along with cap FOO that breaks the migration only
> when FOO is enabled.

The question is even if we run cross-binary migration-test with current
version ("n") we can't detect such issue, right?  Because afaiu with that
we need to let migration-test always understand qemu versions, and it
should skip the new test that will enable FOO for cross-binary test since
it should detect the old binary doesn't support it.

> 
> But fine. We can't predict every scenario. Let's get this series out the
> door.
> 
> Thanks for the comments so far. I'll spin another version.

Yes if you think that is a good start point, we can start from simple.
That's so far the only solution I can think of that has mostly zero
maintanence burden for the tests meanwhile hopefully start to cover some
spots for us.  Said that, the discussion can keep going no matter what.
diff mbox series

Patch

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 001470238b..599f51f978 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1338,14 +1338,21 @@  static int migrate_postcopy_prepare(QTestState **from_ptr,
     migrate_ensure_non_converge(from);
 
     migrate_prepare_for_dirty_mem(from);
-    qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
-                             "  'arguments': { "
-                             "      'channels': [ { 'channel-type': 'main',"
-                             "      'addr': { 'transport': 'socket',"
-                             "                'type': 'inet',"
-                             "                'host': '127.0.0.1',"
-                             "                'port': '0' } } ] } }");
 
+    /* New syntax was introduced in 8.2 */
+    if (migration_vercmp(to, "8.2") < 0) {
+        qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
+                                 "  'arguments': { "
+                                 "      'uri': 'tcp:127.0.0.1:0' } }");
+    } else {
+        qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
+                                 "  'arguments': { "
+                                 "      'channels': [ { 'channel-type': 'main',"
+                                 "      'addr': { 'transport': 'socket',"
+                                 "                'type': 'inet',"
+                                 "                'host': '127.0.0.1',"
+                                 "                'port': '0' } } ] } }");
+    }
     /* Wait for the first serial output from the source */
     wait_for_serial("src_serial");
 
@@ -1603,6 +1610,9 @@  static void test_postcopy_recovery_double_fail(void)
 {
     MigrateCommon args = {
         .postcopy_recovery_test_fail = true,
+        .start = {
+            .since = "8.2",
+        },
     };
 
     test_postcopy_recovery_common(&args);
@@ -1665,6 +1675,7 @@  static void test_analyze_script(void)
 {
     MigrateStart args = {
         .opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
+        .since = "8.2",
     };
     QTestState *from, *to;
     g_autofree char *uri = NULL;
@@ -2090,6 +2101,9 @@  static void test_precopy_file(void)
     MigrateCommon args = {
         .connect_uri = uri,
         .listen_uri = "defer",
+        .start = {
+            .since = "8.2"
+        },
     };
 
     test_file_common(&args, true);
@@ -2134,6 +2148,9 @@  static void test_precopy_file_offset(void)
         .connect_uri = uri,
         .listen_uri = "defer",
         .finish_hook = file_offset_finish_hook,
+        .start = {
+            .since = "8.2"
+        },
     };
 
     test_file_common(&args, false);
@@ -2148,6 +2165,9 @@  static void test_precopy_file_offset_bad(void)
         .connect_uri = uri,
         .listen_uri = "defer",
         .result = MIG_TEST_QMP_ERROR,
+        .start = {
+            .since = "8.2"
+        },
     };
 
     test_file_common(&args, false);