Patchwork migration: remove subsections in fdc and rtl8139 and bump versions

login
register
mail settings
Submitter Anthony Liguori
Date Aug. 2, 2011, 11:08 p.m.
Message ID <1312326516-10117-1-git-send-email-aliguori@us.ibm.com>
Download mbox | patch
Permalink /patch/108007/
State New
Headers show

Comments

Anthony Liguori - Aug. 2, 2011, 11:08 p.m.
As Paolo points out, the migration protocol is ambiguous when using subsections
today.  That means that even if we preserve subsections and change the protocol
accordingly, the old protocol w/subsections is still ambiguous.

Remove subsection usage and bump any device using subsections.  This effectively
eliminates the amiguouity and allows for a clean transition to a new protocol
with unambiguous subsections.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 hw/fdc.c     |   37 ++++++-------------------------------
 hw/rtl8139.c |   34 +++-------------------------------
 2 files changed, 9 insertions(+), 62 deletions(-)
Anthony Liguori - Aug. 2, 2011, 11:17 p.m.
On 08/02/2011 06:08 PM, Anthony Liguori wrote:
> As Paolo points out, the migration protocol is ambiguous when using subsections
> today.  That means that even if we preserve subsections and change the protocol
> accordingly, the old protocol w/subsections is still ambiguous.
>
> Remove subsection usage and bump any device using subsections.  This effectively
> eliminates the amiguouity and allows for a clean transition to a new protocol
> with unambiguous subsections.
>
> Signed-off-by: Anthony Liguori<aliguori@us.ibm.com>

And to clarify, I'm proposing we carry this for stable 0.15 and master.

Regards,

Anthony Liguori

> ---
>   hw/fdc.c     |   37 ++++++-------------------------------
>   hw/rtl8139.c |   34 +++-------------------------------
>   2 files changed, 9 insertions(+), 62 deletions(-)
>
> diff --git a/hw/fdc.c b/hw/fdc.c
> index edf0360..d8d74c9 100644
> --- a/hw/fdc.c
> +++ b/hw/fdc.c
> @@ -554,45 +554,20 @@ static int fdrive_media_changed_post_load(void *opaque, int version_id)
>       return 0;
>   }
>
> -static bool fdrive_media_changed_needed(void *opaque)
> -{
> -    FDrive *drive = opaque;
> -
> -    return (drive->bs != NULL&&  drive->bs->media_changed != 1);
> -}
> -
> -static const VMStateDescription vmstate_fdrive_media_changed = {
> -    .name = "fdrive/media_changed",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> -    .minimum_version_id_old = 1,
> -    .pre_save = fdrive_media_changed_pre_save,
> -    .post_load = fdrive_media_changed_post_load,
> -    .fields      = (VMStateField[]) {
> -        VMSTATE_UINT8(media_changed, FDrive),
> -        VMSTATE_END_OF_LIST()
> -    }
> -};
> -
>   static const VMStateDescription vmstate_fdrive = {
>       .name = "fdrive",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> -    .minimum_version_id_old = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 2,
> +    .minimum_version_id_old = 2,
> +    .pre_save = fdrive_media_changed_pre_save,
> +    .post_load = fdrive_media_changed_post_load,
>       .fields      = (VMStateField[]) {
>           VMSTATE_UINT8(head, FDrive),
>           VMSTATE_UINT8(track, FDrive),
>           VMSTATE_UINT8(sect, FDrive),
> +        VMSTATE_UINT8(media_changed, FDrive),
>           VMSTATE_END_OF_LIST()
>       },
> -    .subsections = (VMStateSubsection[]) {
> -        {
> -            .vmsd =&vmstate_fdrive_media_changed,
> -            .needed =&fdrive_media_changed_needed,
> -        } , {
> -            /* empty */
> -        }
> -    }
>   };
>
>   static void fdc_pre_save(void *opaque)
> diff --git a/hw/rtl8139.c b/hw/rtl8139.c
> index 5214b8c..11951f2 100644
> --- a/hw/rtl8139.c
> +++ b/hw/rtl8139.c
> @@ -505,9 +505,6 @@ typedef struct RTL8139State {
>       /* PCI interrupt timer */
>       QEMUTimer *timer;
>       int64_t TimerExpire;
> -
> -    /* Support migration to/from old versions */
> -    int rtl8139_mmio_io_addr_dummy;
>   } RTL8139State;
>
>   static void rtl8139_set_next_tctr_time(RTL8139State *s, int64_t current_time);
> @@ -3259,21 +3256,6 @@ static int rtl8139_post_load(void *opaque, int version_id)
>       return 0;
>   }
>
> -static bool rtl8139_hotplug_ready_needed(void *opaque)
> -{
> -    return qdev_machine_modified();
> -}
> -
> -static const VMStateDescription vmstate_rtl8139_hotplug_ready ={
> -    .name = "rtl8139/hotplug_ready",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> -    .minimum_version_id_old = 1,
> -    .fields      = (VMStateField []) {
> -        VMSTATE_END_OF_LIST()
> -    }
> -};
> -
>   static void rtl8139_pre_save(void *opaque)
>   {
>       RTL8139State* s = opaque;
> @@ -3283,14 +3265,13 @@ static void rtl8139_pre_save(void *opaque)
>       rtl8139_set_next_tctr_time(s, current_time);
>       s->TCTR = muldiv64(current_time - s->TCTR_base, PCI_FREQUENCY,
>                          get_ticks_per_sec());
> -    s->rtl8139_mmio_io_addr_dummy = s->rtl8139_mmio_io_addr;
>   }
>
>   static const VMStateDescription vmstate_rtl8139 = {
>       .name = "rtl8139",
> -    .version_id = 4,
> -    .minimum_version_id = 3,
> -    .minimum_version_id_old = 3,
> +    .version_id = 5,
> +    .minimum_version_id = 5,
> +    .minimum_version_id_old = 5,
>       .post_load = rtl8139_post_load,
>       .pre_save  = rtl8139_pre_save,
>       .fields      = (VMStateField []) {
> @@ -3336,7 +3317,6 @@ static const VMStateDescription vmstate_rtl8139 = {
>
>           VMSTATE_UNUSED(4),
>           VMSTATE_MACADDR(conf.macaddr, RTL8139State),
> -        VMSTATE_INT32(rtl8139_mmio_io_addr_dummy, RTL8139State),
>
>           VMSTATE_UINT32(currTxDesc, RTL8139State),
>           VMSTATE_UINT32(currCPlusRxDesc, RTL8139State),
> @@ -3366,14 +3346,6 @@ static const VMStateDescription vmstate_rtl8139 = {
>           VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4),
>           VMSTATE_END_OF_LIST()
>       },
> -    .subsections = (VMStateSubsection []) {
> -        {
> -            .vmsd =&vmstate_rtl8139_hotplug_ready,
> -            .needed = rtl8139_hotplug_ready_needed,
> -        }, {
> -            /* empty */
> -        }
> -    }
>   };
>
>   /***********************************************************/
Juan Quintela - Aug. 2, 2011, 11:25 p.m.
Anthony Liguori <aliguori@us.ibm.com> wrote:
> As Paolo points out, the migration protocol is ambiguous when using subsections
> today.  That means that even if we preserve subsections and change the protocol
> accordingly, the old protocol w/subsections is still ambiguous.
>
> Remove subsection usage and bump any device using subsections.  This effectively
> eliminates the amiguouity and allows for a clean transition to a new protocol
> with unambiguous subsections.

If you are doing that, you can just remove the old protocol altogether.
You are missing also ide on your patch.

If we remove migration from any machine that uses ide + floppy, we can
just ban migration from old versions altogether.  Only platform that
ever cares about forward compatiblity is pc's.  And pc's always use
floppies on qemu.

I still think that improving the "subsection" match is the way to go for 0.15.

Later, Juan.
Anthony Liguori - Aug. 3, 2011, 12:12 a.m.
On 08/02/2011 06:25 PM, Juan Quintela wrote:
> Anthony Liguori<aliguori@us.ibm.com>  wrote:
>> As Paolo points out, the migration protocol is ambiguous when using subsections
>> today.  That means that even if we preserve subsections and change the protocol
>> accordingly, the old protocol w/subsections is still ambiguous.
>>
>> Remove subsection usage and bump any device using subsections.  This effectively
>> eliminates the amiguouity and allows for a clean transition to a new protocol
>> with unambiguous subsections.
>
> If you are doing that, you can just remove the old protocol altogether.

The fundamental problem is that newer QEMUs generate migration state 
that is ambiguous to older QEMUs.

No matter what, we have to change the migration protocol in some way as 
to make it impossible to migrate from newer QEMUs to older QEMUs.

Even with subsections in the older protocol, newer QEMUs (provided we 
don't add more subsections), can still read the old protocol.

Paolo's proposed changes make newer QEMUs use a new protocol.  It's 
still possible to read the older protocol.  This means that you can't 
migrate new to old, but can migrate old to new.

I've poured over these patches in great detail and the changes are just 
too much right now.  Just the ram_addr_t change which now conflicts has 
a non-trivial resolution due to a related Xen change.

So my thinking is to be a bit more conservative.  If we bump the version 
number for 0.15.0, we make sure that we don't allow new -> old 
migration.  We will break old -> new migration, but we can fix that 
(including in the stable series) by adding special handling of the 
previous version.

Fixing new->old is the critical bit here.  We can resolve old->new as a 
stable update.

> You are missing also ide on your patch.

Thanks for catching that.

>  Only platform that
> ever cares about forward compatiblity is pc's.  And pc's always use
> floppies on qemu.
>
> I still think that improving the "subsection" match is the way to go for 0.15.

This series was just too late for 0.15.  I can close to suggesting that 
we delay 0.15 in order to give this time to be tested thoroughly but I 
think my proposal is a reasonable compromise.

Regards,

Anthony Liguori

>
> Later, Juan.
>
>
Paolo Bonzini - Aug. 3, 2011, 6:44 a.m.
On 08/03/2011 02:12 AM, Anthony Liguori wrote:
> Paolo's proposed changes make newer QEMUs use a new protocol.  It's
> still possible to read the older protocol.  This means that you can't
> migrate new to old, but can migrate old to new.

Not really true, with my series you can migrate new to old if you use 
pc-0.xx as the machine on the source too.

Paolo
Juan Quintela - Aug. 3, 2011, 9 a.m.
Anthony Liguori <aliguori@us.ibm.com> wrote:
> On 08/02/2011 06:25 PM, Juan Quintela wrote:
>> Anthony Liguori<aliguori@us.ibm.com>  wrote:
>>> As Paolo points out, the migration protocol is ambiguous when using subsections
>>> today.  That means that even if we preserve subsections and change the protocol
>>> accordingly, the old protocol w/subsections is still ambiguous.
>>>
>>> Remove subsection usage and bump any device using subsections.  This effectively
>>> eliminates the amiguouity and allows for a clean transition to a new protocol
>>> with unambiguous subsections.
>>
>> If you are doing that, you can just remove the old protocol altogether.
>
> The fundamental problem is that newer QEMUs generate migration state
> that is ambiguous to older QEMUs.
>
> No matter what, we have to change the migration protocol in some way
> as to make it impossible to migrate from newer QEMUs to older QEMUs.
>
> Even with subsections in the older protocol, newer QEMUs (provided we
> don't add more subsections), can still read the old protocol.
>
> Paolo's proposed changes make newer QEMUs use a new protocol.  It's
> still possible to read the older protocol.  This means that you can't
> migrate new to old, but can migrate old to new.
>
> I've poured over these patches in great detail and the changes are
> just too much right now.  Just the ram_addr_t change which now
> conflicts has a non-trivial resolution due to a related Xen change.
>
> So my thinking is to be a bit more conservative.  If we bump the
> version number for 0.15.0, we make sure that we don't allow new -> old
> migration.  We will break old -> new migration, but we can fix that
> (including in the stable series) by adding special handling of the
> previous version.
>
> Fixing new->old is the critical bit here.  We can resolve old->new as
> a stable update.

We are making something that is incompatible.  If we don't care about
breaking 0.14 -> 0.15 migration.  Just add Paolo version, and drop
altogether the old protocol.  It will give us exactly the same result,
new versions work, old versions fail.

>> You are missing also ide on your patch.
>
> Thanks for catching that.
>
>>  Only platform that
>> ever cares about forward compatiblity is pc's.  And pc's always use
>> floppies on qemu.
>>
>> I still think that improving the "subsection" match is the way to go for 0.15.
>
> This series was just too late for 0.15.  I can close to suggesting
> that we delay 0.15 in order to give this time to be tested thoroughly
> but I think my proposal is a reasonable compromise.

I think it is anything except reasonable.  From my point on view (and I
am biased), it is the equivalent of finding a corner case broken on
qcow2 and make all _OLD_ qcow2 images unreadable.  It will work, but it
is anything except reasonable.  As said, everything uses an fdc.
Furthermore, the _two_ things that you change don't matter in the big
scheme of things.  The subsections that fail are the ones that can
appear in the middle of another section.  The ones that appear at the
end of a real section work perfectly well with this protocol (a.k.a. the
ones that are broken are IDE), floppy and rtl8139 are ok with current
protocol.

Later, Juan.
Anthony Liguori - Aug. 3, 2011, 5:44 p.m.
On 08/03/2011 01:44 AM, Paolo Bonzini wrote:
> On 08/03/2011 02:12 AM, Anthony Liguori wrote:
>> Paolo's proposed changes make newer QEMUs use a new protocol. It's
>> still possible to read the older protocol. This means that you can't
>> migrate new to old, but can migrate old to new.
>
> Not really true, with my series you can migrate new to old if you use
> pc-0.xx as the machine on the source too.

But that is dangerous, no?   It suffers from the same problem with 
subsections.

Regards,

Anthony Liguori

> Paolo
>
Anthony Liguori - Aug. 3, 2011, 5:49 p.m.
On 08/03/2011 04:00 AM, Juan Quintela wrote:
> Anthony Liguori<aliguori@us.ibm.com>  wrote:
>> So my thinking is to be a bit more conservative.  If we bump the
>> version number for 0.15.0, we make sure that we don't allow new ->  old
>> migration.  We will break old ->  new migration, but we can fix that
>> (including in the stable series) by adding special handling of the
>> previous version.
>>
>> Fixing new->old is the critical bit here.  We can resolve old->new as
>> a stable update.
>
> We are making something that is incompatible.

Indeed.

>  If we don't care about
> breaking 0.14 ->  0.15 migration.  Just add Paolo version, and drop
> altogether the old protocol.  It will give us exactly the same result,
> new versions work, old versions fail.

I don't have a problem with Paolo's new protocol.  In fact, I'm strong 
in favor of applying it to master.  But I don't like the idea of adding 
a new migration protocol with no testing in master before putting it in 
a release.

>>
>> This series was just too late for 0.15.  I can close to suggesting
>> that we delay 0.15 in order to give this time to be tested thoroughly
>> but I think my proposal is a reasonable compromise.
>
> I think it is anything except reasonable.  From my point on view (and I
> am biased), it is the equivalent of finding a corner case broken on
> qcow2 and make all _OLD_ qcow2 images unreadable.  It will work, but it
> is anything except reasonable.  As said, everything uses an fdc.
> Furthermore, the _two_ things that you change don't matter in the big
> scheme of things.  The subsections that fail are the ones that can
> appear in the middle of another section.  The ones that appear at the
> end of a real section work perfectly well with this protocol (a.k.a. the
> ones that are broken are IDE), floppy and rtl8139 are ok with current
> protocol.

I can certainly limit the change to IDE if we think machine, floppy, and 
rtl8139 are safe.

My main concern is fixing the corruption during migration for the 
release.  Once that is fixed, we can revisit compatibility for the 
stable branch (by introducing a compatibility path for the older version).

Regards,

Anthony Liguori

>
> Later, Juan.
>
Juan Quintela - Aug. 3, 2011, 9:42 p.m.
Anthony Liguori <anthony@codemonkey.ws> wrote:
> On 08/03/2011 04:00 AM, Juan Quintela wrote:

> I don't have a problem with Paolo's new protocol.  In fact, I'm strong
> in favor of applying it to master.  But I don't like the idea of
> adding a new migration protocol with no testing in master before
> putting it in a release.

I have.  If we are changing a protocol in an incompatible version, we
can remove a lot of warts that current descriptions have.  Not that
Paolo protocol is bad, but if we are going to do some change, adding
things like size, removing previous warts, etc is the way to go.

>
>>>
>>> This series was just too late for 0.15.  I can close to suggesting
>>> that we delay 0.15 in order to give this time to be tested thoroughly
>>> but I think my proposal is a reasonable compromise.
>>
>> I think it is anything except reasonable.  From my point on view (and I
>> am biased), it is the equivalent of finding a corner case broken on
>> qcow2 and make all _OLD_ qcow2 images unreadable.  It will work, but it
>> is anything except reasonable.  As said, everything uses an fdc.
>> Furthermore, the _two_ things that you change don't matter in the big
>> scheme of things.  The subsections that fail are the ones that can
>> appear in the middle of another section.  The ones that appear at the
>> end of a real section work perfectly well with this protocol (a.k.a. the
>> ones that are broken are IDE), floppy and rtl8139 are ok with current
>> protocol.
>
> I can certainly limit the change to IDE if we think machine, floppy,
> and rtl8139 are safe.

Ok, only IDE is broken, something done if we are not reverting the others.

> My main concern is fixing the corruption during migration for the
> release.  Once that is fixed, we can revisit compatibility for the
> stable branch (by introducing a compatibility path for the older
> version).

It is not possible to introduce that later.  I very much prefer leave
things as they are, and add an stricter subsection test later.

Later, Juan.
Paolo Bonzini - Aug. 4, 2011, 7:17 a.m.
On 08/03/2011 07:44 PM, Anthony Liguori wrote:
>>>
>>
>> Not really true, with my series you can migrate new to old if you use
>> pc-0.xx as the machine on the source too.
>
> But that is dangerous, no?   It suffers from the same problem with
> subsections.

Not in the common case where the source does not publish the subsection.

Paolo
Paolo Bonzini - Aug. 4, 2011, 7:22 a.m.
On 08/03/2011 11:42 PM, Juan Quintela wrote:
> >  I can certainly limit the change to IDE if we think machine, floppy,
> >  and rtl8139 are safe.
>
> Ok, only IDE is broken, something done if we are not reverting the others.

Floppy is broken too, and has the problem that the subsection is sent 
almost always.  On the other hand IDE is only broken if we have 
subsections, and that is a rare case.

So perhaps the best course of action is to only fix floppy, either as in 
your patch or by reverting 7d905f71.  Let's keep IDE as it is broken as 
0.14 was, and apply my patch to master.  I'll rebase as soon as there is 
agreement this is the way to go.

Paolo
Kevin Wolf - Aug. 4, 2011, 12:59 p.m.
Am 03.08.2011 23:42, schrieb Juan Quintela:
> Anthony Liguori <anthony@codemonkey.ws> wrote:
>> On 08/03/2011 04:00 AM, Juan Quintela wrote:
> 
>> I don't have a problem with Paolo's new protocol.  In fact, I'm strong
>> in favor of applying it to master.  But I don't like the idea of
>> adding a new migration protocol with no testing in master before
>> putting it in a release.
> 
> I have.  If we are changing a protocol in an incompatible version, we
> can remove a lot of warts that current descriptions have.  Not that
> Paolo protocol is bad, but if we are going to do some change, adding
> things like size, removing previous warts, etc is the way to go.

So how about stating clearly that migrating between 0.x and 1.x won't
work and using the next few months to develop a sane migration protocol?

We'll still have to do something about 0.15 and it's not very nice to
break migration twice, but seems there is no way around it.

Kevin
Anthony Liguori - Aug. 4, 2011, 1:12 p.m.
On 08/04/2011 07:59 AM, Kevin Wolf wrote:
> Am 03.08.2011 23:42, schrieb Juan Quintela:
>> Anthony Liguori<anthony@codemonkey.ws>  wrote:
>>> On 08/03/2011 04:00 AM, Juan Quintela wrote:
>>
>>> I don't have a problem with Paolo's new protocol.  In fact, I'm strong
>>> in favor of applying it to master.  But I don't like the idea of
>>> adding a new migration protocol with no testing in master before
>>> putting it in a release.
>>
>> I have.  If we are changing a protocol in an incompatible version, we
>> can remove a lot of warts that current descriptions have.  Not that
>> Paolo protocol is bad, but if we are going to do some change, adding
>> things like size, removing previous warts, etc is the way to go.
>
> So how about stating clearly that migrating between 0.x and 1.x won't
> work and using the next few months to develop a sane migration protocol?

I'm all for it.

Regards,

Anthony Liguori

>
> We'll still have to do something about 0.15 and it's not very nice to
> break migration twice, but seems there is no way around it.
>
> Kevin
>
Anthony Liguori - Aug. 4, 2011, 1:20 p.m.
On 08/04/2011 02:22 AM, Paolo Bonzini wrote:
> On 08/03/2011 11:42 PM, Juan Quintela wrote:
>> > I can certainly limit the change to IDE if we think machine, floppy,
>> > and rtl8139 are safe.
>>
>> Ok, only IDE is broken, something done if we are not reverting the
>> others.
>
> Floppy is broken too, and has the problem that the subsection is sent
> almost always. On the other hand IDE is only broken if we have
> subsections, and that is a rare case.
>
> So perhaps the best course of action is to only fix floppy, either as in
> your patch or by reverting 7d905f71. Let's keep IDE as it is broken as
> 0.14 was, and apply my patch to master.


Hrm, so the idea is to keep IDE broken but not worry because it is so 
rare?  Why not just force the subsection to be sent?

I'm strongly in favor of bumping the version number of any broken device 
for 0.15, applying Paolo's patches to master, converting to Visitors, 
and then implementing a new protocol with better (perhaps ASN.1).

I think we can do this without touching very much code at all and in 
such a way that we don't have to worry about major regressions.

Regards,

Anthony Liguori

  I'll rebase as soon as there is
> agreement this is the way to go.
>
> Paolo
>
Kevin Wolf - Aug. 4, 2011, 2:09 p.m.
Am 04.08.2011 15:20, schrieb Anthony Liguori:
> On 08/04/2011 02:22 AM, Paolo Bonzini wrote:
>> On 08/03/2011 11:42 PM, Juan Quintela wrote:
>>>> I can certainly limit the change to IDE if we think machine, floppy,
>>>> and rtl8139 are safe.
>>>
>>> Ok, only IDE is broken, something done if we are not reverting the
>>> others.
>>
>> Floppy is broken too, and has the problem that the subsection is sent
>> almost always. On the other hand IDE is only broken if we have
>> subsections, and that is a rare case.
>>
>> So perhaps the best course of action is to only fix floppy, either as in
>> your patch or by reverting 7d905f71. Let's keep IDE as it is broken as
>> 0.14 was, and apply my patch to master.
> 
> 
> Hrm, so the idea is to keep IDE broken but not worry because it is so 
> rare?  Why not just force the subsection to be sent?

Wouldn't that only introduce more broken cases in which the destination
thinks it has successfully read the whole VM state when in fact it has
only been confused by a subsection?

Kevin
Anthony Liguori - Aug. 4, 2011, 2:30 p.m.
On 08/04/2011 09:09 AM, Kevin Wolf wrote:
> Am 04.08.2011 15:20, schrieb Anthony Liguori:
>> On 08/04/2011 02:22 AM, Paolo Bonzini wrote:
>>> On 08/03/2011 11:42 PM, Juan Quintela wrote:
>>>>> I can certainly limit the change to IDE if we think machine, floppy,
>>>>> and rtl8139 are safe.
>>>>
>>>> Ok, only IDE is broken, something done if we are not reverting the
>>>> others.
>>>
>>> Floppy is broken too, and has the problem that the subsection is sent
>>> almost always. On the other hand IDE is only broken if we have
>>> subsections, and that is a rare case.
>>>
>>> So perhaps the best course of action is to only fix floppy, either as in
>>> your patch or by reverting 7d905f71. Let's keep IDE as it is broken as
>>> 0.14 was, and apply my patch to master.
>>
>>
>> Hrm, so the idea is to keep IDE broken but not worry because it is so
>> rare?  Why not just force the subsection to be sent?
>
> Wouldn't that only introduce more broken cases in which the destination
> thinks it has successfully read the whole VM state when in fact it has
> only been confused by a subsection?

Force the subsection to be sent and increment the version number.

Regards,

Anthony Liguori

>
> Kevin
>
Paolo Bonzini - Aug. 4, 2011, 2:36 p.m.
On 08/04/2011 02:59 PM, Kevin Wolf wrote:
>> >
>> >  I have.  If we are changing a protocol in an incompatible version, we
>> >  can remove a lot of warts that current descriptions have.  Not that
>> >  Paolo protocol is bad, but if we are going to do some change, adding
>> >  things like size, removing previous warts, etc is the way to go.
> So how about stating clearly that migrating between 0.x and 1.x won't
> work and using the next few months to develop a sane migration protocol?

I think that's too early to state, perhaps we can keep backwards 
compatibility---who knows.

And in case we totally screw up in 1.x, we still have my tweaked format 
as a backup.

> We'll still have to do something about 0.15 and it's not very nice to
> break migration twice, but seems there is no way around it.

There is!  Migration didn't break _that_ much with subsections, exactly 
because the buggy case occurs when subsections are sent and subsections 
are rare.  If we always send them, as we did in practice with the 
floppy, we break stuff.  That's why for 0.15 reverting the floppy fix is 
the simplest course of action.

Paolo
Paolo Bonzini - Aug. 4, 2011, 3:07 p.m.
On 08/04/2011 04:30 PM, Anthony Liguori wrote:
> Force the subsection to be sent and increment the version number.

That works if you want the smallest patch that fixes the bug.  It 
doesn't if you want something that is useful, too.

Paolo
Anthony Liguori - Aug. 4, 2011, 8:31 p.m.
On 08/04/2011 02:22 AM, Paolo Bonzini wrote:
> On 08/03/2011 11:42 PM, Juan Quintela wrote:
>> > I can certainly limit the change to IDE if we think machine, floppy,
>> > and rtl8139 are safe.
>>
>> Ok, only IDE is broken, something done if we are not reverting the
>> others.
>
> Floppy is broken too, and has the problem that the subsection is sent
> almost always. On the other hand IDE is only broken if we have
> subsections, and that is a rare case.
>
> So perhaps the best course of action is to only fix floppy, either as in
> your patch or by reverting 7d905f71. Let's keep IDE as it is broken as
> 0.14 was, and apply my patch to master. I'll rebase as soon as there is
> agreement this is the way to go.

Okay, I'm on board and this is what I'm going to do.

Regards,

Anthony Liguori

>
> Paolo
>

Patch

diff --git a/hw/fdc.c b/hw/fdc.c
index edf0360..d8d74c9 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -554,45 +554,20 @@  static int fdrive_media_changed_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static bool fdrive_media_changed_needed(void *opaque)
-{
-    FDrive *drive = opaque;
-
-    return (drive->bs != NULL && drive->bs->media_changed != 1);
-}
-
-static const VMStateDescription vmstate_fdrive_media_changed = {
-    .name = "fdrive/media_changed",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .pre_save = fdrive_media_changed_pre_save,
-    .post_load = fdrive_media_changed_post_load,
-    .fields      = (VMStateField[]) {
-        VMSTATE_UINT8(media_changed, FDrive),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static const VMStateDescription vmstate_fdrive = {
     .name = "fdrive",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .minimum_version_id_old = 2,
+    .pre_save = fdrive_media_changed_pre_save,
+    .post_load = fdrive_media_changed_post_load,
     .fields      = (VMStateField[]) {
         VMSTATE_UINT8(head, FDrive),
         VMSTATE_UINT8(track, FDrive),
         VMSTATE_UINT8(sect, FDrive),
+        VMSTATE_UINT8(media_changed, FDrive),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_fdrive_media_changed,
-            .needed = &fdrive_media_changed_needed,
-        } , {
-            /* empty */
-        }
-    }
 };
 
 static void fdc_pre_save(void *opaque)
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 5214b8c..11951f2 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -505,9 +505,6 @@  typedef struct RTL8139State {
     /* PCI interrupt timer */
     QEMUTimer *timer;
     int64_t TimerExpire;
-
-    /* Support migration to/from old versions */
-    int rtl8139_mmio_io_addr_dummy;
 } RTL8139State;
 
 static void rtl8139_set_next_tctr_time(RTL8139State *s, int64_t current_time);
@@ -3259,21 +3256,6 @@  static int rtl8139_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static bool rtl8139_hotplug_ready_needed(void *opaque)
-{
-    return qdev_machine_modified();
-}
-
-static const VMStateDescription vmstate_rtl8139_hotplug_ready ={
-    .name = "rtl8139/hotplug_ready",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField []) {
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static void rtl8139_pre_save(void *opaque)
 {
     RTL8139State* s = opaque;
@@ -3283,14 +3265,13 @@  static void rtl8139_pre_save(void *opaque)
     rtl8139_set_next_tctr_time(s, current_time);
     s->TCTR = muldiv64(current_time - s->TCTR_base, PCI_FREQUENCY,
                        get_ticks_per_sec());
-    s->rtl8139_mmio_io_addr_dummy = s->rtl8139_mmio_io_addr;
 }
 
 static const VMStateDescription vmstate_rtl8139 = {
     .name = "rtl8139",
-    .version_id = 4,
-    .minimum_version_id = 3,
-    .minimum_version_id_old = 3,
+    .version_id = 5,
+    .minimum_version_id = 5,
+    .minimum_version_id_old = 5,
     .post_load = rtl8139_post_load,
     .pre_save  = rtl8139_pre_save,
     .fields      = (VMStateField []) {
@@ -3336,7 +3317,6 @@  static const VMStateDescription vmstate_rtl8139 = {
 
         VMSTATE_UNUSED(4),
         VMSTATE_MACADDR(conf.macaddr, RTL8139State),
-        VMSTATE_INT32(rtl8139_mmio_io_addr_dummy, RTL8139State),
 
         VMSTATE_UINT32(currTxDesc, RTL8139State),
         VMSTATE_UINT32(currCPlusRxDesc, RTL8139State),
@@ -3366,14 +3346,6 @@  static const VMStateDescription vmstate_rtl8139 = {
         VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_rtl8139_hotplug_ready,
-            .needed = rtl8139_hotplug_ready_needed,
-        }, {
-            /* empty */
-        }
-    }
 };
 
 /***********************************************************/