Message ID | 1432214378-31891-4-git-send-email-kwolf@redhat.com |
---|---|
State | New |
Headers | show |
On 05/21/2015 09:19 AM, Kevin Wolf wrote: > The floppy controller spec describes three different controller phases, > which are currently not explicitly modelled in our emulation. Instead, > each phase is represented by a combination of flags in registers. > > This patch makes explicit in which phase the controller currently is. > > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > --- > hw/block/fdc.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 89 insertions(+) > > diff --git a/hw/block/fdc.c b/hw/block/fdc.c > index 8c41434..f5bcf0b 100644 > --- a/hw/block/fdc.c > +++ b/hw/block/fdc.c > @@ -495,6 +495,33 @@ enum { > FD_DIR_DSKCHG = 0x80, > }; > > +/* > + * See chapter 5.0 "Controller phases" of the spec: > + * > + * Command phase: > + * The host writes a command and its parameters into the FIFO. The command > + * phase is completed when all parameters for the command have been supplied, > + * and execution phase is entered. > + * > + * Execution phase: > + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO > + * contains the payload now, otherwise it's unused. When all bytes of the > + * required data have been transferred, the state is switched to either result > + * phase (if the command produces status bytes) or directly back into the > + * command phase for the next command. > + * > + * Result phase: > + * The host reads out the FIFO, which contains one or more result bytes now. > + */ > +enum { > + /* Only for migration: reconstruct phase from registers like qemu 2.3 */ > + FD_PHASE_RECONSTRUCT = 0, > + > + FD_PHASE_COMMAND = 1, > + FD_PHASE_EXECUTION = 2, > + FD_PHASE_RESULT = 3, > +}; > + > #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI) > #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT) > > @@ -504,6 +531,7 @@ struct FDCtrl { > /* Controller state */ > QEMUTimer *result_timer; > int dma_chann; > + uint8_t phase; > /* Controller's identification */ > uint8_t version; > /* HW */ > @@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = { > } > }; > > +/* > + * Reconstructs the phase from register values according to the logic that was > + * implemented in qemu 2.3. This is the default value that is used if the phase > + * subsection is not present on migration. > + * > + * Don't change this function to reflect newer qemu versions, it is part of > + * the migration ABI. > + */ > +static int reconstruct_phase(FDCtrl *fdctrl) > +{ > + if (fdctrl->msr & FD_MSR_NONDMA) { > + return FD_PHASE_EXECUTION; > + } else if ((fdctrl->msr & FD_MSR_RQM) == 0) { > + /* qemu 2.3 disabled RQM only during DMA transfers */ > + return FD_PHASE_EXECUTION; > + } else if (fdctrl->msr & FD_MSR_DIO) { > + return FD_PHASE_RESULT; > + } else { > + return FD_PHASE_COMMAND; > + } > +} > + > static void fdc_pre_save(void *opaque) > { > FDCtrl *s = opaque; > @@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque) > s->dor_vmstate = s->dor | GET_CUR_DRV(s); > } > > +static int fdc_pre_load(void *opaque) > +{ > + FDCtrl *s = opaque; > + s->phase = FD_PHASE_RECONSTRUCT; > + return 0; > +} > + > static int fdc_post_load(void *opaque, int version_id) > { > FDCtrl *s = opaque; > > SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK); > s->dor = s->dor_vmstate & ~FD_DOR_SELMASK; > + > + if (s->phase == FD_PHASE_RECONSTRUCT) { > + s->phase = reconstruct_phase(s); > + } > + > return 0; > } > > @@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = { > } > }; > > +static bool fdc_phase_needed(void *opaque) > +{ > + FDCtrl *fdctrl = opaque; > + > + return reconstruct_phase(fdctrl) != fdctrl->phase; > +} > + > +static const VMStateDescription vmstate_fdc_phase = { > + .name = "fdc/phase", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT8(phase, FDCtrl), > + VMSTATE_END_OF_LIST() > + } > +}; > + > static const VMStateDescription vmstate_fdc = { > .name = "fdc", > .version_id = 2, > .minimum_version_id = 2, > .pre_save = fdc_pre_save, > + .pre_load = fdc_pre_load, > .post_load = fdc_post_load, > .fields = (VMStateField[]) { > /* Controller State */ > @@ -839,6 +919,9 @@ static const VMStateDescription vmstate_fdc = { > .vmsd = &vmstate_fdc_result_timer, > .needed = fdc_result_timer_needed, > } , { > + .vmsd = &vmstate_fdc_phase, > + .needed = fdc_phase_needed, > + } , { > /* empty */ > } > } > @@ -1137,6 +1220,7 @@ static uint32_t fdctrl_read_dir(FDCtrl *fdctrl) > /* Clear the FIFO and update the state for receiving the next command */ > static void fdctrl_to_command_phase(FDCtrl *fdctrl) > { > + fdctrl->phase = FD_PHASE_COMMAND; > fdctrl->data_dir = FD_DIR_WRITE; > fdctrl->data_pos = 0; > fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO); > @@ -1146,6 +1230,7 @@ static void fdctrl_to_command_phase(FDCtrl *fdctrl) > * @fifo_len is the number of result bytes to be read out. */ > static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len) > { > + fdctrl->phase = FD_PHASE_RESULT; > fdctrl->data_dir = FD_DIR_READ; > fdctrl->data_len = fifo_len; > fdctrl->data_pos = 0; > @@ -1912,6 +1997,9 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction) > fdctrl_raise_irq(fdctrl); > } > > +/* > + * Handlers for the execution phase of each command > + */ > static const struct { > uint8_t value; > uint8_t mask; > @@ -2015,6 +2103,7 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) > /* We now have all parameters > * and will be able to treat the command > */ > + fdctrl->phase = FD_PHASE_EXECUTION; > if (fdctrl->data_state & FD_STATE_FORMAT) { > fdctrl_format_sector(fdctrl); > return; > Acked-by: John Snow <jsnow@redhat.com> Looks ok to me, CC David Gilbert for a migration look-see. --js
* John Snow (jsnow@redhat.com) wrote: > > > On 05/21/2015 09:19 AM, Kevin Wolf wrote: > > The floppy controller spec describes three different controller phases, > > which are currently not explicitly modelled in our emulation. Instead, > > each phase is represented by a combination of flags in registers. > > > > This patch makes explicit in which phase the controller currently is. > > > > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > --- > > hw/block/fdc.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 89 insertions(+) > > > > diff --git a/hw/block/fdc.c b/hw/block/fdc.c > > index 8c41434..f5bcf0b 100644 > > --- a/hw/block/fdc.c > > +++ b/hw/block/fdc.c > > @@ -495,6 +495,33 @@ enum { > > FD_DIR_DSKCHG = 0x80, > > }; > > > > +/* > > + * See chapter 5.0 "Controller phases" of the spec: > > + * > > + * Command phase: > > + * The host writes a command and its parameters into the FIFO. The command > > + * phase is completed when all parameters for the command have been supplied, > > + * and execution phase is entered. > > + * > > + * Execution phase: > > + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO > > + * contains the payload now, otherwise it's unused. When all bytes of the > > + * required data have been transferred, the state is switched to either result > > + * phase (if the command produces status bytes) or directly back into the > > + * command phase for the next command. > > + * > > + * Result phase: > > + * The host reads out the FIFO, which contains one or more result bytes now. > > + */ > > +enum { > > + /* Only for migration: reconstruct phase from registers like qemu 2.3 */ > > + FD_PHASE_RECONSTRUCT = 0, > > + > > + FD_PHASE_COMMAND = 1, > > + FD_PHASE_EXECUTION = 2, > > + FD_PHASE_RESULT = 3, > > +}; > > + > > #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI) > > #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT) > > > > @@ -504,6 +531,7 @@ struct FDCtrl { > > /* Controller state */ > > QEMUTimer *result_timer; > > int dma_chann; > > + uint8_t phase; > > /* Controller's identification */ > > uint8_t version; > > /* HW */ > > @@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = { > > } > > }; > > > > +/* > > + * Reconstructs the phase from register values according to the logic that was > > + * implemented in qemu 2.3. This is the default value that is used if the phase > > + * subsection is not present on migration. > > + * > > + * Don't change this function to reflect newer qemu versions, it is part of > > + * the migration ABI. > > + */ > > +static int reconstruct_phase(FDCtrl *fdctrl) > > +{ > > + if (fdctrl->msr & FD_MSR_NONDMA) { > > + return FD_PHASE_EXECUTION; > > + } else if ((fdctrl->msr & FD_MSR_RQM) == 0) { > > + /* qemu 2.3 disabled RQM only during DMA transfers */ > > + return FD_PHASE_EXECUTION; > > + } else if (fdctrl->msr & FD_MSR_DIO) { > > + return FD_PHASE_RESULT; > > + } else { > > + return FD_PHASE_COMMAND; > > + } > > +} > > + > > static void fdc_pre_save(void *opaque) > > { > > FDCtrl *s = opaque; > > @@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque) > > s->dor_vmstate = s->dor | GET_CUR_DRV(s); > > } > > > > +static int fdc_pre_load(void *opaque) > > +{ > > + FDCtrl *s = opaque; > > + s->phase = FD_PHASE_RECONSTRUCT; > > + return 0; > > +} > > + > > static int fdc_post_load(void *opaque, int version_id) > > { > > FDCtrl *s = opaque; > > > > SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK); > > s->dor = s->dor_vmstate & ~FD_DOR_SELMASK; > > + > > + if (s->phase == FD_PHASE_RECONSTRUCT) { > > + s->phase = reconstruct_phase(s); > > + } > > + > > return 0; > > } > > > > @@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = { > > } > > }; > > > > +static bool fdc_phase_needed(void *opaque) > > +{ > > + FDCtrl *fdctrl = opaque; > > + > > + return reconstruct_phase(fdctrl) != fdctrl->phase; > > +} OK, that is one way of doing it which should work, as long as most of the time that matches. My preference for subsections is normally to make them just conditional on machine type, that way backwards migration just works. However, if the result of the backwards migration would be data corruption (which if I understand what you saying it could in this case) then it's arguably better to fail migration in those cases. You might like to add a comment to that effect. Would I be correct in thinking that all our common OSs end up not sending this section while running and not accessing the floppy? Dave > > + > > +static const VMStateDescription vmstate_fdc_phase = { > > + .name = "fdc/phase", > > + .version_id = 1, > > + .minimum_version_id = 1, > > + .fields = (VMStateField[]) { > > + VMSTATE_UINT8(phase, FDCtrl), > > + VMSTATE_END_OF_LIST() > > + } > > +}; > > + > > static const VMStateDescription vmstate_fdc = { > > .name = "fdc", > > .version_id = 2, > > .minimum_version_id = 2, > > .pre_save = fdc_pre_save, > > + .pre_load = fdc_pre_load, > > .post_load = fdc_post_load, > > .fields = (VMStateField[]) { > > /* Controller State */ > > @@ -839,6 +919,9 @@ static const VMStateDescription vmstate_fdc = { > > .vmsd = &vmstate_fdc_result_timer, > > .needed = fdc_result_timer_needed, > > } , { > > + .vmsd = &vmstate_fdc_phase, > > + .needed = fdc_phase_needed, > > + } , { > > /* empty */ > > } > > } > > @@ -1137,6 +1220,7 @@ static uint32_t fdctrl_read_dir(FDCtrl *fdctrl) > > /* Clear the FIFO and update the state for receiving the next command */ > > static void fdctrl_to_command_phase(FDCtrl *fdctrl) > > { > > + fdctrl->phase = FD_PHASE_COMMAND; > > fdctrl->data_dir = FD_DIR_WRITE; > > fdctrl->data_pos = 0; > > fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO); > > @@ -1146,6 +1230,7 @@ static void fdctrl_to_command_phase(FDCtrl *fdctrl) > > * @fifo_len is the number of result bytes to be read out. */ > > static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len) > > { > > + fdctrl->phase = FD_PHASE_RESULT; > > fdctrl->data_dir = FD_DIR_READ; > > fdctrl->data_len = fifo_len; > > fdctrl->data_pos = 0; > > @@ -1912,6 +1997,9 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction) > > fdctrl_raise_irq(fdctrl); > > } > > > > +/* > > + * Handlers for the execution phase of each command > > + */ > > static const struct { > > uint8_t value; > > uint8_t mask; > > @@ -2015,6 +2103,7 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) > > /* We now have all parameters > > * and will be able to treat the command > > */ > > + fdctrl->phase = FD_PHASE_EXECUTION; > > if (fdctrl->data_state & FD_STATE_FORMAT) { > > fdctrl_format_sector(fdctrl); > > return; > > > > Acked-by: John Snow <jsnow@redhat.com> > > Looks ok to me, CC David Gilbert for a migration look-see. > > --js -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
"Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > * John Snow (jsnow@redhat.com) wrote: >> >> >> On 05/21/2015 09:19 AM, Kevin Wolf wrote: >> > The floppy controller spec describes three different controller phases, >> > which are currently not explicitly modelled in our emulation. Instead, >> > each phase is represented by a combination of flags in registers. >> > >> > This patch makes explicit in which phase the controller currently is. >> > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com> >> > --- >> > hw/block/fdc.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> > 1 file changed, 89 insertions(+) >> > >> > diff --git a/hw/block/fdc.c b/hw/block/fdc.c >> > index 8c41434..f5bcf0b 100644 >> > --- a/hw/block/fdc.c >> > +++ b/hw/block/fdc.c >> > @@ -495,6 +495,33 @@ enum { >> > FD_DIR_DSKCHG = 0x80, >> > }; >> > >> > +/* >> > + * See chapter 5.0 "Controller phases" of the spec: >> > + * >> > + * Command phase: >> > + * The host writes a command and its parameters into the FIFO. The command >> > + * phase is completed when all parameters for the command have been supplied, >> > + * and execution phase is entered. >> > + * >> > + * Execution phase: >> > + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO >> > + * contains the payload now, otherwise it's unused. When all bytes of the >> > + * required data have been transferred, the state is switched to either result >> > + * phase (if the command produces status bytes) or directly back into the >> > + * command phase for the next command. >> > + * >> > + * Result phase: >> > + * The host reads out the FIFO, which contains one or more result bytes now. >> > + */ >> > +enum { >> > + /* Only for migration: reconstruct phase from registers like qemu 2.3 */ >> > + FD_PHASE_RECONSTRUCT = 0, >> > + >> > + FD_PHASE_COMMAND = 1, >> > + FD_PHASE_EXECUTION = 2, >> > + FD_PHASE_RESULT = 3, >> > +}; >> > + >> > #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI) >> > #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT) >> > >> > @@ -504,6 +531,7 @@ struct FDCtrl { >> > /* Controller state */ >> > QEMUTimer *result_timer; >> > int dma_chann; >> > + uint8_t phase; >> > /* Controller's identification */ >> > uint8_t version; >> > /* HW */ >> > @@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = { >> > } >> > }; >> > >> > +/* >> > + * Reconstructs the phase from register values according to the logic that was >> > + * implemented in qemu 2.3. This is the default value that is used if the phase >> > + * subsection is not present on migration. >> > + * >> > + * Don't change this function to reflect newer qemu versions, it is part of >> > + * the migration ABI. >> > + */ >> > +static int reconstruct_phase(FDCtrl *fdctrl) >> > +{ >> > + if (fdctrl->msr & FD_MSR_NONDMA) { >> > + return FD_PHASE_EXECUTION; >> > + } else if ((fdctrl->msr & FD_MSR_RQM) == 0) { >> > + /* qemu 2.3 disabled RQM only during DMA transfers */ >> > + return FD_PHASE_EXECUTION; >> > + } else if (fdctrl->msr & FD_MSR_DIO) { >> > + return FD_PHASE_RESULT; >> > + } else { >> > + return FD_PHASE_COMMAND; >> > + } >> > +} >> > + >> > static void fdc_pre_save(void *opaque) >> > { >> > FDCtrl *s = opaque; >> > @@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque) >> > s->dor_vmstate = s->dor | GET_CUR_DRV(s); >> > } >> > >> > +static int fdc_pre_load(void *opaque) >> > +{ >> > + FDCtrl *s = opaque; >> > + s->phase = FD_PHASE_RECONSTRUCT; >> > + return 0; >> > +} >> > + >> > static int fdc_post_load(void *opaque, int version_id) >> > { >> > FDCtrl *s = opaque; >> > >> > SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK); >> > s->dor = s->dor_vmstate & ~FD_DOR_SELMASK; >> > + >> > + if (s->phase == FD_PHASE_RECONSTRUCT) { >> > + s->phase = reconstruct_phase(s); >> > + } >> > + >> > return 0; >> > } >> > >> > @@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = { >> > } >> > }; >> > >> > +static bool fdc_phase_needed(void *opaque) >> > +{ >> > + FDCtrl *fdctrl = opaque; >> > + >> > + return reconstruct_phase(fdctrl) != fdctrl->phase; >> > +} > > OK, that is one way of doing it which should work, as long > as most of the time that matches. > > My preference for subsections is normally to make them just > conditional on machine type, that way backwards migration just > works. However, if the result of the backwards migration would > be data corruption (which if I understand what you saying it could > in this case) then it's arguably better to fail migration in those > cases. This isn't a matter of preference. Subsections were designed to solves a problem we only have because we're not perfect: device model bugs, namely forgetting to migrate a bit of state, or failing to model it in the first place. Subsections tied to machine types solve an entirely different problem: the old version of the device is just fine, including migration, but we now want a new and improved variant, which needs some more state migrated. Putting that piece of state into a subsection tied to the new machine type avoids code duplication. But what do you do for device model bugs serious enough to need fixing even for existing machine types, when the fix needs additional state migrated? Subsections tied to machine types are *not* a solution there. That's why this isn't about preference! It's about having a bug to fix or not. You seem rather reluctant to use subsections for their intended purpose. I don't understand; their correctness argument is really simple. Fundamentally, we're picking a pair of state serialization and deserialization functions (in the mathematical sense). The obvious encoding function for a state consisting of pieces is to encode all the pieces one by one. Call this e(), and its decoding function d(). Here's an alternative pair e'() and d'(): * Pick a piece of the state. * Pick one of its values. Let's call it "the prevalent value" for reasons that will become apparent shortly. * Encode just like e() does, except omit this piece of state if and only if it has its prevalent value. * In the decoding function, default this piece of state to its prevalent value. The encodings are equivalent: d(e(x)) = d'(e'(x)) for all x. At this point, you might want to tell me not to waste your time with trivial math. If so, good! The triviality is exactly my point. Straightforwardly translated to code, e() and d() put everything in the section. Drawback: add/remove/modify of state kills migration to and from older versions. e'() and d'() put a piece of state in a subsection that is only present when the piece doesn't have its prevalent value. Because the encodings are equivalent, forward migration transfers exactly the same state regardless of which of them we choose. So why not choose one that makes backward migration work exactly when it's safe? * Put all pieces of state the old version doesn't understand in subsections. * Identify prevalent state. * Double-check the old version behaves safely in prevalent state. * In prevalent state, no subsections are sent, and migration succeeds. * In any other state, migration fails. Naturally, this is useful only when the prevalent state is actually prevalent. > You might like to add a comment to that effect. > > Would I be correct in thinking that all our common OSs > end up not sending this section while running and not > accessing the floppy? Yes, the prevalent state better be prevalent.
* Markus Armbruster (armbru@redhat.com) wrote: > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > > > * John Snow (jsnow@redhat.com) wrote: > >> > >> > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote: > >> > The floppy controller spec describes three different controller phases, > >> > which are currently not explicitly modelled in our emulation. Instead, > >> > each phase is represented by a combination of flags in registers. > >> > > >> > This patch makes explicit in which phase the controller currently is. > >> > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com> <snip> > >> > +static bool fdc_phase_needed(void *opaque) > >> > +{ > >> > + FDCtrl *fdctrl = opaque; > >> > + > >> > + return reconstruct_phase(fdctrl) != fdctrl->phase; > >> > +} > > > > OK, that is one way of doing it which should work, as long > > as most of the time that matches. > > > > My preference for subsections is normally to make them just > > conditional on machine type, that way backwards migration just > > works. However, if the result of the backwards migration would > > be data corruption (which if I understand what you saying it could > > in this case) then it's arguably better to fail migration in those > > cases. > > This isn't a matter of preference. > > Subsections were designed to solves a problem we only have because we're > not perfect: device model bugs, namely forgetting to migrate a bit of > state, or failing to model it in the first place. > > Subsections tied to machine types solve an entirely different problem: > the old version of the device is just fine, including migration, but we > now want a new and improved variant, which needs some more state > migrated. Putting that piece of state into a subsection tied to the new > machine type avoids code duplication. > > But what do you do for device model bugs serious enough to need fixing > even for existing machine types, when the fix needs additional state > migrated? Subsections tied to machine types are *not* a solution there. > That's why this isn't about preference! It's about having a bug to fix > or not. My problem is that people keep adding subsections to fix minor device bugs because they think breaking migration is irrelevant. If the bug being fixed causes data corruption then OK I agree it is probably better to break migration, otherwise you need to make a decision about whether fixing the device bug during migration is better or worse than having the migration fail. Some people say 'Migration failure is ok - people just try it again'; sorry, no, that's useless in environments where the VM has 100s of GB of RAM and takes hours to migrate only for it then to fail, and it was urgent that it was migrated off that source host. > You seem rather reluctant to use subsections for their intended purpose. > I don't understand; their correctness argument is really simple. Correct, I am; it encourages people to break migration compatibility too strongly. Now, since I have to worry about backwards-migration downstream this influences my choice. People keep telling me that upstream doesn't care about backwards migration compatibility, but I've not seen any evidence of that, indeed I've seen other people trying to put backwards migration fixes in. > Fundamentally, we're picking a pair of state serialization and > deserialization functions (in the mathematical sense). > > The obvious encoding function for a state consisting of pieces is to > encode all the pieces one by one. Call this e(), and its decoding > function d(). > > Here's an alternative pair e'() and d'(): > > * Pick a piece of the state. > > * Pick one of its values. Let's call it "the prevalent value" for > reasons that will become apparent shortly. > > * Encode just like e() does, except omit this piece of state if and only > if it has its prevalent value. > > * In the decoding function, default this piece of state to its prevalent > value. > > The encodings are equivalent: d(e(x)) = d'(e'(x)) for all x. > > At this point, you might want to tell me not to waste your time with > trivial math. If so, good! The triviality is exactly my point. Well I was going to mention it, but since you already have I wont bother.... Actually, the problem here is that the symbols look trivial but they're meaningless without the following textual description, and that's where the problems are. > Straightforwardly translated to code, e() and d() put everything in the > section. Drawback: add/remove/modify of state kills migration to and > from older versions. > > e'() and d'() put a piece of state in a subsection that is only present > when the piece doesn't have its prevalent value. > > Because the encodings are equivalent, forward migration transfers > exactly the same state regardless of which of them we choose. > > So why not choose one that makes backward migration work exactly when > it's safe? > > * Put all pieces of state the old version doesn't understand in > subsections. > > * Identify prevalent state. > > * Double-check the old version behaves safely in prevalent state. > > * In prevalent state, no subsections are sent, and migration succeeds. > > * In any other state, migration fails. > > Naturally, this is useful only when the prevalent state is actually > prevalent. The 'prevalent state' idea is what I hate about the use of subsections. It's very very difficult to reason about: 1) subtle changes in either the guest OS or the implementation can change what the 'prevalent state' is. 2) Are you sure that the prevalent state is the same on Windows n+1? 3) What about during boot? 4) What about during installation? It's then roulette as to whether a migration will work. It's also a nightmare to test, having bugs where migration fails 1/100 times on some random OS in a production environment is horrible, and that's exactly what the 'prevalent state' design produces. I work to the rule that: a) If the migration bug is such that the effect of the missing state is really bad; a hung guest, or data corruption - then sure use a subsection based on a 'prevalent state' decision. b) Else tie it to the machine type. c) A combination is even better; i.e. always send the state on new machine types, just because it makes the behaviour more consistent. > > You might like to add a comment to that effect. > > > > Would I be correct in thinking that all our common OSs > > end up not sending this section while running and not > > accessing the floppy? > > Yes, the prevalent state better be prevalent. Dave -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben: > * Markus Armbruster (armbru@redhat.com) wrote: > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > > > > > * John Snow (jsnow@redhat.com) wrote: > > >> > > >> > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote: > > >> > The floppy controller spec describes three different controller phases, > > >> > which are currently not explicitly modelled in our emulation. Instead, > > >> > each phase is represented by a combination of flags in registers. > > >> > > > >> > This patch makes explicit in which phase the controller currently is. > > >> > > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > <snip> > > > >> > +static bool fdc_phase_needed(void *opaque) > > >> > +{ > > >> > + FDCtrl *fdctrl = opaque; > > >> > + > > >> > + return reconstruct_phase(fdctrl) != fdctrl->phase; > > >> > +} > > > > > > OK, that is one way of doing it which should work, as long > > > as most of the time that matches. > > > > > > My preference for subsections is normally to make them just > > > conditional on machine type, that way backwards migration just > > > works. However, if the result of the backwards migration would > > > be data corruption (which if I understand what you saying it could > > > in this case) then it's arguably better to fail migration in those > > > cases. > > > > This isn't a matter of preference. > > > > Subsections were designed to solves a problem we only have because we're > > not perfect: device model bugs, namely forgetting to migrate a bit of > > state, or failing to model it in the first place. > > > > Subsections tied to machine types solve an entirely different problem: > > the old version of the device is just fine, including migration, but we > > now want a new and improved variant, which needs some more state > > migrated. Putting that piece of state into a subsection tied to the new > > machine type avoids code duplication. > > > > But what do you do for device model bugs serious enough to need fixing > > even for existing machine types, when the fix needs additional state > > migrated? Subsections tied to machine types are *not* a solution there. > > That's why this isn't about preference! It's about having a bug to fix > > or not. > > My problem is that people keep adding subsections to fix minor device > bugs because they think breaking migration is irrelevant. If the bug > being fixed causes data corruption then OK I agree it is probably better > to break migration, otherwise you need to make a decision about whether > fixing the device bug during migration is better or worse than having > the migration fail. > Some people say 'Migration failure is ok - people just try it again'; > sorry, no, that's useless in environments where the VM has 100s of GB > of RAM and takes hours to migrate only for it then to fail, and it > was urgent that it was migrated off that source host. If this is a problem in practice, it sounds a lot like we need to improve our handling of that situation in general. Why do we abort the whole migration when serialising the state fails? Can't we abort just the completion and switch back to the live state, and then retry a bit later? Most, if not all, of the subsections you mentioned that fix minor bugs fix some states while the device is actively used and shorty after we should be back in a state where the subsection isn't needed. This is exactly the "fails 1 in 100 migrations" cases you talked about, and they would be practically fixed if you made a few more attempts to complete the migration before you let it fail (or perhaps even retry indefinitely, as long as the user doesn't cancel migration). Kevin
* Kevin Wolf (kwolf@redhat.com) wrote: > Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben: > > * Markus Armbruster (armbru@redhat.com) wrote: > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > > > > > > > * John Snow (jsnow@redhat.com) wrote: > > > >> > > > >> > > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote: > > > >> > The floppy controller spec describes three different controller phases, > > > >> > which are currently not explicitly modelled in our emulation. Instead, > > > >> > each phase is represented by a combination of flags in registers. > > > >> > > > > >> > This patch makes explicit in which phase the controller currently is. > > > >> > > > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > > > <snip> > > > > > >> > +static bool fdc_phase_needed(void *opaque) > > > >> > +{ > > > >> > + FDCtrl *fdctrl = opaque; > > > >> > + > > > >> > + return reconstruct_phase(fdctrl) != fdctrl->phase; > > > >> > +} > > > > > > > > OK, that is one way of doing it which should work, as long > > > > as most of the time that matches. > > > > > > > > My preference for subsections is normally to make them just > > > > conditional on machine type, that way backwards migration just > > > > works. However, if the result of the backwards migration would > > > > be data corruption (which if I understand what you saying it could > > > > in this case) then it's arguably better to fail migration in those > > > > cases. > > > > > > This isn't a matter of preference. > > > > > > Subsections were designed to solves a problem we only have because we're > > > not perfect: device model bugs, namely forgetting to migrate a bit of > > > state, or failing to model it in the first place. > > > > > > Subsections tied to machine types solve an entirely different problem: > > > the old version of the device is just fine, including migration, but we > > > now want a new and improved variant, which needs some more state > > > migrated. Putting that piece of state into a subsection tied to the new > > > machine type avoids code duplication. > > > > > > But what do you do for device model bugs serious enough to need fixing > > > even for existing machine types, when the fix needs additional state > > > migrated? Subsections tied to machine types are *not* a solution there. > > > That's why this isn't about preference! It's about having a bug to fix > > > or not. > > > > My problem is that people keep adding subsections to fix minor device > > bugs because they think breaking migration is irrelevant. If the bug > > being fixed causes data corruption then OK I agree it is probably better > > to break migration, otherwise you need to make a decision about whether > > fixing the device bug during migration is better or worse than having > > the migration fail. > > Some people say 'Migration failure is ok - people just try it again'; > > sorry, no, that's useless in environments where the VM has 100s of GB > > of RAM and takes hours to migrate only for it then to fail, and it > > was urgent that it was migrated off that source host. > > If this is a problem in practice, it sounds a lot like we need to > improve our handling of that situation in general. Why do we abort > the whole migration when serialising the state fails? Can't we abort > just the completion and switch back to the live state, and then retry a > bit later? It's not the serialisation that fails, it's the deserialisation on the destination, and our migration stream is currently one way, so we have no way of signalling back to the source to 'just try that again'. So the migration fails, the source carries on running and we can only try again from the beginning. > Most, if not all, of the subsections you mentioned that fix minor bugs > fix some states while the device is actively used and shorty after we > should be back in a state where the subsection isn't needed. I think once any (non-iterative) device state has been sent we can't necessarily abandon and try again; I'm not confident that the devices would work if we sent one set of state and then tried to reload it later. I think that would need a further pass that went around all the devices and said 'is it OK to migrate now' and that would happen before any of the non-iterative devices were migrated. > This is exactly the "fails 1 in 100 migrations" cases you talked about, > and they would be practically fixed if you made a few more attempts to > complete the migration before you let it fail (or perhaps even retry > indefinitely, as long as the user doesn't cancel migration). This causes a problem if your belief in the safe-state to migrate doesn't happen on some new OS version, because then it will retry indefinitely. You'd know you didn't need to wait on the newer machine type, but you could still use this trick on the older machine type. (Of course if you knew you were going to a new QEMU that could always accept the new section then you wouldn't worry about this - but people dont like sending qemu versions). Dave > > Kevin -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Am 29.05.2015 um 11:38 hat Dr. David Alan Gilbert geschrieben: > * Kevin Wolf (kwolf@redhat.com) wrote: > > Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben: > > > * Markus Armbruster (armbru@redhat.com) wrote: > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > > > > > > > > > * John Snow (jsnow@redhat.com) wrote: > > > > >> > > > > >> > > > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote: > > > > >> > The floppy controller spec describes three different controller phases, > > > > >> > which are currently not explicitly modelled in our emulation. Instead, > > > > >> > each phase is represented by a combination of flags in registers. > > > > >> > > > > > >> > This patch makes explicit in which phase the controller currently is. > > > > >> > > > > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > > > > > <snip> > > > > > > > >> > +static bool fdc_phase_needed(void *opaque) > > > > >> > +{ > > > > >> > + FDCtrl *fdctrl = opaque; > > > > >> > + > > > > >> > + return reconstruct_phase(fdctrl) != fdctrl->phase; > > > > >> > +} > > > > > > > > > > OK, that is one way of doing it which should work, as long > > > > > as most of the time that matches. > > > > > > > > > > My preference for subsections is normally to make them just > > > > > conditional on machine type, that way backwards migration just > > > > > works. However, if the result of the backwards migration would > > > > > be data corruption (which if I understand what you saying it could > > > > > in this case) then it's arguably better to fail migration in those > > > > > cases. > > > > > > > > This isn't a matter of preference. > > > > > > > > Subsections were designed to solves a problem we only have because we're > > > > not perfect: device model bugs, namely forgetting to migrate a bit of > > > > state, or failing to model it in the first place. > > > > > > > > Subsections tied to machine types solve an entirely different problem: > > > > the old version of the device is just fine, including migration, but we > > > > now want a new and improved variant, which needs some more state > > > > migrated. Putting that piece of state into a subsection tied to the new > > > > machine type avoids code duplication. > > > > > > > > But what do you do for device model bugs serious enough to need fixing > > > > even for existing machine types, when the fix needs additional state > > > > migrated? Subsections tied to machine types are *not* a solution there. > > > > That's why this isn't about preference! It's about having a bug to fix > > > > or not. > > > > > > My problem is that people keep adding subsections to fix minor device > > > bugs because they think breaking migration is irrelevant. If the bug > > > being fixed causes data corruption then OK I agree it is probably better > > > to break migration, otherwise you need to make a decision about whether > > > fixing the device bug during migration is better or worse than having > > > the migration fail. > > > Some people say 'Migration failure is ok - people just try it again'; > > > sorry, no, that's useless in environments where the VM has 100s of GB > > > of RAM and takes hours to migrate only for it then to fail, and it > > > was urgent that it was migrated off that source host. > > > > If this is a problem in practice, it sounds a lot like we need to > > improve our handling of that situation in general. Why do we abort > > the whole migration when serialising the state fails? Can't we abort > > just the completion and switch back to the live state, and then retry a > > bit later? > > It's not the serialisation that fails, it's the deserialisation on the destination, > and our migration stream is currently one way, so we have no way > of signalling back to the source to 'just try that again'. > So the migration fails, the source carries on running and we can only > try again from the beginning. True, forgot about that. So what if the management tool queried the destination for supported subsections and passed that as an (optional) argument to the migration command on the source? Then the source would have full control and could complete migration only when no unsupported subsection are needed. > > Most, if not all, of the subsections you mentioned that fix minor bugs > > fix some states while the device is actively used and shorty after we > > should be back in a state where the subsection isn't needed. > > I think once any (non-iterative) device state has been sent we can't > necessarily abandon and try again; I'm not confident that the devices > would work if we sent one set of state and then tried to reload it later. Why would serialising the device state change any device state? Don't you already continue running the source after sending non-iterative device state when migration fails on the destination? > I think that would need a further pass that went around all the devices and > said 'is it OK to migrate now' and that would happen before any of the > non-iterative devices were migrated. Yes, another pass is an option, too. > > This is exactly the "fails 1 in 100 migrations" cases you talked about, > > and they would be practically fixed if you made a few more attempts to > > complete the migration before you let it fail (or perhaps even retry > > indefinitely, as long as the user doesn't cancel migration). > > This causes a problem if your belief in the safe-state to migrate > doesn't happen on some new OS version, because then it will retry > indefinitely. You'd know you didn't need to wait on the newer machine > type, but you could still use this trick on the older machine type. > (Of course if you knew you were going to a new QEMU that could > always accept the new section then you wouldn't worry about this - > but people dont like sending qemu versions). Is retrying indefinitely a worse problem than failing migration, though? Anyway, this was just a thought. You could as well give up after a number of attempts and just increase that number from 1 currently to at least maybe 10 or so. (Potentially even configurable, with indefinitely being one option.) Kevin
* Kevin Wolf (kwolf@redhat.com) wrote: > Am 29.05.2015 um 11:38 hat Dr. David Alan Gilbert geschrieben: > > * Kevin Wolf (kwolf@redhat.com) wrote: > > > Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben: > > > > * Markus Armbruster (armbru@redhat.com) wrote: > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > > > > > > > > > > > * John Snow (jsnow@redhat.com) wrote: > > > > > >> > > > > > >> > > > > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote: > > > > > >> > The floppy controller spec describes three different controller phases, > > > > > >> > which are currently not explicitly modelled in our emulation. Instead, > > > > > >> > each phase is represented by a combination of flags in registers. > > > > > >> > > > > > > >> > This patch makes explicit in which phase the controller currently is. > > > > > >> > > > > > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > > > > > > > <snip> > > > > > > > > > >> > +static bool fdc_phase_needed(void *opaque) > > > > > >> > +{ > > > > > >> > + FDCtrl *fdctrl = opaque; > > > > > >> > + > > > > > >> > + return reconstruct_phase(fdctrl) != fdctrl->phase; > > > > > >> > +} > > > > > > > > > > > > OK, that is one way of doing it which should work, as long > > > > > > as most of the time that matches. > > > > > > > > > > > > My preference for subsections is normally to make them just > > > > > > conditional on machine type, that way backwards migration just > > > > > > works. However, if the result of the backwards migration would > > > > > > be data corruption (which if I understand what you saying it could > > > > > > in this case) then it's arguably better to fail migration in those > > > > > > cases. > > > > > > > > > > This isn't a matter of preference. > > > > > > > > > > Subsections were designed to solves a problem we only have because we're > > > > > not perfect: device model bugs, namely forgetting to migrate a bit of > > > > > state, or failing to model it in the first place. > > > > > > > > > > Subsections tied to machine types solve an entirely different problem: > > > > > the old version of the device is just fine, including migration, but we > > > > > now want a new and improved variant, which needs some more state > > > > > migrated. Putting that piece of state into a subsection tied to the new > > > > > machine type avoids code duplication. > > > > > > > > > > But what do you do for device model bugs serious enough to need fixing > > > > > even for existing machine types, when the fix needs additional state > > > > > migrated? Subsections tied to machine types are *not* a solution there. > > > > > That's why this isn't about preference! It's about having a bug to fix > > > > > or not. > > > > > > > > My problem is that people keep adding subsections to fix minor device > > > > bugs because they think breaking migration is irrelevant. If the bug > > > > being fixed causes data corruption then OK I agree it is probably better > > > > to break migration, otherwise you need to make a decision about whether > > > > fixing the device bug during migration is better or worse than having > > > > the migration fail. > > > > Some people say 'Migration failure is ok - people just try it again'; > > > > sorry, no, that's useless in environments where the VM has 100s of GB > > > > of RAM and takes hours to migrate only for it then to fail, and it > > > > was urgent that it was migrated off that source host. > > > > > > If this is a problem in practice, it sounds a lot like we need to > > > improve our handling of that situation in general. Why do we abort > > > the whole migration when serialising the state fails? Can't we abort > > > just the completion and switch back to the live state, and then retry a > > > bit later? > > > > It's not the serialisation that fails, it's the deserialisation on the destination, > > and our migration stream is currently one way, so we have no way > > of signalling back to the source to 'just try that again'. > > So the migration fails, the source carries on running and we can only > > try again from the beginning. > > True, forgot about that. > > So what if the management tool queried the destination for supported > subsections and passed that as an (optional) argument to the migration > command on the source? > > Then the source would have full control and could complete migration > only when no unsupported subsection are needed. I tried doing something similar, where I had an optional parameter on the source which was the version of the destination qemu; people really hated that. > > > Most, if not all, of the subsections you mentioned that fix minor bugs > > > fix some states while the device is actively used and shorty after we > > > should be back in a state where the subsection isn't needed. > > > > I think once any (non-iterative) device state has been sent we can't > > necessarily abandon and try again; I'm not confident that the devices > > would work if we sent one set of state and then tried to reload it later. > > Why would serialising the device state change any device state? Don't > you already continue running the source after sending non-iterative > device state when migration fails on the destination? It's the destination I'm worried about here, not the source; lets say you have two devices, a & b. 'a' gets serialised, but then 'b' finds it has to wait, so we return to running the source and sending pages across. However the destination has already loaded the 'a' device state; so that when we serialise again we send a new 'a' device state'; I'm worrying here that the destination 'a' state loader would get upset trying to load the same state twice without somehow resetting it. > > I think that would need a further pass that went around all the devices and > > said 'is it OK to migrate now' and that would happen before any of the > > non-iterative devices were migrated. > > Yes, another pass is an option, too. > > > > This is exactly the "fails 1 in 100 migrations" cases you talked about, > > > and they would be practically fixed if you made a few more attempts to > > > complete the migration before you let it fail (or perhaps even retry > > > indefinitely, as long as the user doesn't cancel migration). > > > > This causes a problem if your belief in the safe-state to migrate > > doesn't happen on some new OS version, because then it will retry > > indefinitely. You'd know you didn't need to wait on the newer machine > > type, but you could still use this trick on the older machine type. > > (Of course if you knew you were going to a new QEMU that could > > always accept the new section then you wouldn't worry about this - > > but people dont like sending qemu versions). > > Is retrying indefinitely a worse problem than failing migration, though? > Anyway, this was just a thought. You could as well give up after a > number of attempts and just increase that number from 1 currently to > at least maybe 10 or so. (Potentially even configurable, with > indefinitely being one option.) No, it's probably better than failing the migration, but what I'm worrying about is, as in your code, where the destination has the new version of qemu and could cope with the new subsections, so there is no need to wait. Dave > > Kevin -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 29 May 2015 at 11:34, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > It's the destination I'm worried about here, not the source; lets say > you have two devices, a & b. 'a' gets serialised, but then 'b' finds > it has to wait, so we return to running the source and sending pages > across. However the destination has already loaded the 'a' device state; > so that when we serialise again we send a new 'a' device state'; I'm > worrying here that the destination 'a' state loader would get upset > trying to load the same state twice without somehow resetting it. You would need to reset the system before resending device state, I think. Device implementations rely on knowing that migration happens into a freshly reset device, and we don't have any way of resetting a single device, only the whole system at once. -- PMM
* Peter Maydell (peter.maydell@linaro.org) wrote: > On 29 May 2015 at 11:34, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > > It's the destination I'm worried about here, not the source; lets say > > you have two devices, a & b. 'a' gets serialised, but then 'b' finds > > it has to wait, so we return to running the source and sending pages > > across. However the destination has already loaded the 'a' device state; > > so that when we serialise again we send a new 'a' device state'; I'm > > worrying here that the destination 'a' state loader would get upset > > trying to load the same state twice without somehow resetting it. > > You would need to reset the system before resending device state, > I think. Device implementations rely on knowing that migration happens > into a freshly reset device, and we don't have any way of resetting > a single device, only the whole system at once. I don't think you can do a device (or certainly not a system) reset without resending the contents of RAM which was the whole point of not restarting the migration from the beginning. Dave > > -- PMM -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Am 29.05.2015 um 12:34 hat Dr. David Alan Gilbert geschrieben: > * Kevin Wolf (kwolf@redhat.com) wrote: > > Am 29.05.2015 um 11:38 hat Dr. David Alan Gilbert geschrieben: > > > * Kevin Wolf (kwolf@redhat.com) wrote: > > > > Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben: > > > > > * Markus Armbruster (armbru@redhat.com) wrote: > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > > > > > > > > > > > > > * John Snow (jsnow@redhat.com) wrote: > > > > > > >> > > > > > > >> > > > > > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote: > > > > > > >> > The floppy controller spec describes three different controller phases, > > > > > > >> > which are currently not explicitly modelled in our emulation. Instead, > > > > > > >> > each phase is represented by a combination of flags in registers. > > > > > > >> > > > > > > > >> > This patch makes explicit in which phase the controller currently is. > > > > > > >> > > > > > > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > > > > > > > > > <snip> > > > > > > > > > > > >> > +static bool fdc_phase_needed(void *opaque) > > > > > > >> > +{ > > > > > > >> > + FDCtrl *fdctrl = opaque; > > > > > > >> > + > > > > > > >> > + return reconstruct_phase(fdctrl) != fdctrl->phase; > > > > > > >> > +} > > > > > > > > > > > > > > OK, that is one way of doing it which should work, as long > > > > > > > as most of the time that matches. > > > > > > > > > > > > > > My preference for subsections is normally to make them just > > > > > > > conditional on machine type, that way backwards migration just > > > > > > > works. However, if the result of the backwards migration would > > > > > > > be data corruption (which if I understand what you saying it could > > > > > > > in this case) then it's arguably better to fail migration in those > > > > > > > cases. > > > > > > > > > > > > This isn't a matter of preference. > > > > > > > > > > > > Subsections were designed to solves a problem we only have because we're > > > > > > not perfect: device model bugs, namely forgetting to migrate a bit of > > > > > > state, or failing to model it in the first place. > > > > > > > > > > > > Subsections tied to machine types solve an entirely different problem: > > > > > > the old version of the device is just fine, including migration, but we > > > > > > now want a new and improved variant, which needs some more state > > > > > > migrated. Putting that piece of state into a subsection tied to the new > > > > > > machine type avoids code duplication. > > > > > > > > > > > > But what do you do for device model bugs serious enough to need fixing > > > > > > even for existing machine types, when the fix needs additional state > > > > > > migrated? Subsections tied to machine types are *not* a solution there. > > > > > > That's why this isn't about preference! It's about having a bug to fix > > > > > > or not. > > > > > > > > > > My problem is that people keep adding subsections to fix minor device > > > > > bugs because they think breaking migration is irrelevant. If the bug > > > > > being fixed causes data corruption then OK I agree it is probably better > > > > > to break migration, otherwise you need to make a decision about whether > > > > > fixing the device bug during migration is better or worse than having > > > > > the migration fail. > > > > > Some people say 'Migration failure is ok - people just try it again'; > > > > > sorry, no, that's useless in environments where the VM has 100s of GB > > > > > of RAM and takes hours to migrate only for it then to fail, and it > > > > > was urgent that it was migrated off that source host. > > > > > > > > If this is a problem in practice, it sounds a lot like we need to > > > > improve our handling of that situation in general. Why do we abort > > > > the whole migration when serialising the state fails? Can't we abort > > > > just the completion and switch back to the live state, and then retry a > > > > bit later? > > > > > > It's not the serialisation that fails, it's the deserialisation on the destination, > > > and our migration stream is currently one way, so we have no way > > > of signalling back to the source to 'just try that again'. > > > So the migration fails, the source carries on running and we can only > > > try again from the beginning. > > > > True, forgot about that. > > > > So what if the management tool queried the destination for supported > > subsections and passed that as an (optional) argument to the migration > > command on the source? > > > > Then the source would have full control and could complete migration > > only when no unsupported subsection are needed. > > I tried doing something similar, where I had an optional parameter on > the source which was the version of the destination qemu; people really > hated that. Did they hate it because version numbers are really badly suited for the task (I would agree with that) or already because the source gets some information about the destination? In the mailing list thread that I could find quicky, only Paolo seemed to object, and the two arguments that I see is that the version number isn't a good criterion and that it might not be worth the effort for a corner case that isn't practically relevant. Apparently you think that it is in fact practically relevant, so the only point that remains is replacing the version number by something better. The name and version ID of all supported sections and subsections should be the definite source for qemu to tell whether it can migrate to the destination now, possibly can migrate later, or can't migrate at all (if the version ID on the destination is too small). > > > > Most, if not all, of the subsections you mentioned that fix minor bugs > > > > fix some states while the device is actively used and shorty after we > > > > should be back in a state where the subsection isn't needed. > > > > > > I think once any (non-iterative) device state has been sent we can't > > > necessarily abandon and try again; I'm not confident that the devices > > > would work if we sent one set of state and then tried to reload it later. > > > > Why would serialising the device state change any device state? Don't > > you already continue running the source after sending non-iterative > > device state when migration fails on the destination? > > It's the destination I'm worried about here, not the source; lets say > you have two devices, a & b. 'a' gets serialised, but then 'b' finds > it has to wait, so we return to running the source and sending pages > across. However the destination has already loaded the 'a' device state; > so that when we serialise again we send a new 'a' device state'; I'm > worrying here that the destination 'a' state loader would get upset > trying to load the same state twice without somehow resetting it. If that were the case, it would be a bug that needs fixing. For pure VMState fields, it's obviously not a problem. Anything with pre/post_load handlers could in theory have bugs, though. > > > I think that would need a further pass that went around all the devices and > > > said 'is it OK to migrate now' and that would happen before any of the > > > non-iterative devices were migrated. > > > > Yes, another pass is an option, too. So if you're concerned about it, and we don't want to audit the pre/post_load handlers, just do this second pass. It makes sense anyway to fail as soon as possible and not send lots of data before we fail. > > > > This is exactly the "fails 1 in 100 migrations" cases you talked about, > > > > and they would be practically fixed if you made a few more attempts to > > > > complete the migration before you let it fail (or perhaps even retry > > > > indefinitely, as long as the user doesn't cancel migration). > > > > > > This causes a problem if your belief in the safe-state to migrate > > > doesn't happen on some new OS version, because then it will retry > > > indefinitely. You'd know you didn't need to wait on the newer machine > > > type, but you could still use this trick on the older machine type. > > > (Of course if you knew you were going to a new QEMU that could > > > always accept the new section then you wouldn't worry about this - > > > but people dont like sending qemu versions). > > > > Is retrying indefinitely a worse problem than failing migration, though? > > Anyway, this was just a thought. You could as well give up after a > > number of attempts and just increase that number from 1 currently to > > at least maybe 10 or so. (Potentially even configurable, with > > indefinitely being one option.) > > No, it's probably better than failing the migration, but what I'm worrying > about is, as in your code, where the destination has the new version > of qemu and could cope with the new subsections, so there is no need > to wait. Waiting obviously only makes sense when you know what subsections the destination supports. Kevin
"Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > * Markus Armbruster (armbru@redhat.com) wrote: >> "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: >> >> > * John Snow (jsnow@redhat.com) wrote: >> >> >> >> >> >> On 05/21/2015 09:19 AM, Kevin Wolf wrote: >> >> > The floppy controller spec describes three different controller phases, >> >> > which are currently not explicitly modelled in our emulation. Instead, >> >> > each phase is represented by a combination of flags in registers. >> >> > >> >> > This patch makes explicit in which phase the controller currently is. >> >> > >> >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > <snip> > >> >> > +static bool fdc_phase_needed(void *opaque) >> >> > +{ >> >> > + FDCtrl *fdctrl = opaque; >> >> > + >> >> > + return reconstruct_phase(fdctrl) != fdctrl->phase; >> >> > +} >> > >> > OK, that is one way of doing it which should work, as long >> > as most of the time that matches. >> > >> > My preference for subsections is normally to make them just >> > conditional on machine type, that way backwards migration just >> > works. However, if the result of the backwards migration would >> > be data corruption (which if I understand what you saying it could >> > in this case) then it's arguably better to fail migration in those >> > cases. >> >> This isn't a matter of preference. >> >> Subsections were designed to solves a problem we only have because we're >> not perfect: device model bugs, namely forgetting to migrate a bit of >> state, or failing to model it in the first place. >> >> Subsections tied to machine types solve an entirely different problem: >> the old version of the device is just fine, including migration, but we >> now want a new and improved variant, which needs some more state >> migrated. Putting that piece of state into a subsection tied to the new >> machine type avoids code duplication. >> >> But what do you do for device model bugs serious enough to need fixing >> even for existing machine types, when the fix needs additional state >> migrated? Subsections tied to machine types are *not* a solution there. >> That's why this isn't about preference! It's about having a bug to fix >> or not. > > My problem is that people keep adding subsections to fix minor device > bugs because they think breaking migration is irrelevant. If the bug > being fixed causes data corruption then OK I agree it is probably better > to break migration, otherwise you need to make a decision about whether > fixing the device bug during migration is better or worse than having > the migration fail. > Some people say 'Migration failure is ok - people just try it again'; > sorry, no, that's useless in environments where the VM has 100s of GB > of RAM and takes hours to migrate only for it then to fail, and it > was urgent that it was migrated off that source host. > >> You seem rather reluctant to use subsections for their intended purpose. >> I don't understand; their correctness argument is really simple. > > Correct, I am; it encourages people to break migration compatibility > too strongly. > > Now, since I have to worry about backwards-migration downstream > this influences my choice. People keep telling me that upstream > doesn't care about backwards migration compatibility, but I've not > seen any evidence of that, If we cared about backwards migration compatibility, we'd test it. In my opinion, we do care about infrastructure for making backwards migration possible, but that's about it. > indeed I've seen other people trying > to put backwards migration fixes in. Doesn't quite rise to a level I'd call "care". >> Fundamentally, we're picking a pair of state serialization and >> deserialization functions (in the mathematical sense). >> >> The obvious encoding function for a state consisting of pieces is to >> encode all the pieces one by one. Call this e(), and its decoding >> function d(). >> >> Here's an alternative pair e'() and d'(): >> >> * Pick a piece of the state. >> >> * Pick one of its values. Let's call it "the prevalent value" for >> reasons that will become apparent shortly. >> >> * Encode just like e() does, except omit this piece of state if and only >> if it has its prevalent value. >> >> * In the decoding function, default this piece of state to its prevalent >> value. >> >> The encodings are equivalent: d(e(x)) = d'(e'(x)) for all x. >> >> At this point, you might want to tell me not to waste your time with >> trivial math. If so, good! The triviality is exactly my point. > > Well I was going to mention it, but since you already have I wont bother.... > Actually, the problem here is that the symbols look trivial but they're > meaningless without the following textual description, and that's > where the problems are. > >> Straightforwardly translated to code, e() and d() put everything in the >> section. Drawback: add/remove/modify of state kills migration to and >> from older versions. >> >> e'() and d'() put a piece of state in a subsection that is only present >> when the piece doesn't have its prevalent value. >> >> Because the encodings are equivalent, forward migration transfers >> exactly the same state regardless of which of them we choose. >> >> So why not choose one that makes backward migration work exactly when >> it's safe? >> >> * Put all pieces of state the old version doesn't understand in >> subsections. >> >> * Identify prevalent state. >> >> * Double-check the old version behaves safely in prevalent state. >> >> * In prevalent state, no subsections are sent, and migration succeeds. >> >> * In any other state, migration fails. >> >> Naturally, this is useful only when the prevalent state is actually >> prevalent. > > The 'prevalent state' idea is what I hate about the use of subsections. > > It's very very difficult to reason about: > 1) subtle changes in either the guest OS or the implementation can > change what the 'prevalent state' is. > 2) Are you sure that the prevalent state is the same on Windows n+1? > 3) What about during boot? > 4) What about during installation? > > It's then roulette as to whether a migration will work. > It's also a nightmare to test, having bugs where migration fails 1/100 > times on some random OS in a production environment is horrible, and > that's exactly what the 'prevalent state' design produces. Thanks for explaining your thinking, I believe I understand it better now. The rationale behind the "prevalent state" design is that failing migration outright is better than letting it silently corrupt state. I still think that makes some sense. However: > I work to the rule that: > a) If the migration bug is such that the effect of the missing state > is really bad; a hung guest, or data corruption - then sure > use a subsection based on a 'prevalent state' decision. > > b) Else tie it to the machine type. You're right in that state corruption isn't necessarily catastrophic. When its impact is minor, availability (backward migration succeeds) *can* be preferable to correctness (state is exactly preserved under backward and forward migration). In other words: I agree with you there's a tradeoff to be made. > c) A combination is even better; i.e. always send the state > on new machine types, just because it makes the behaviour > more consistent. I don't like (c), because it adds a second "needed" predicate. Granted, the new one's as trivial as can be, but X + trivial is still more complicated than just X. [...]
"Dr. David Alan Gilbert" <dgilbert@redhat.com> writes: > * Peter Maydell (peter.maydell@linaro.org) wrote: >> On 29 May 2015 at 11:34, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: >> > It's the destination I'm worried about here, not the source; lets say >> > you have two devices, a & b. 'a' gets serialised, but then 'b' finds >> > it has to wait, so we return to running the source and sending pages >> > across. However the destination has already loaded the 'a' device state; >> > so that when we serialise again we send a new 'a' device state'; I'm >> > worrying here that the destination 'a' state loader would get upset >> > trying to load the same state twice without somehow resetting it. >> >> You would need to reset the system before resending device state, >> I think. Device implementations rely on knowing that migration happens >> into a freshly reset device, and we don't have any way of resetting >> a single device, only the whole system at once. > > I don't think you can do a device (or certainly not a system) reset > without resending the contents of RAM which was the whole point of > not restarting the migration from the beginning. What about resetting all the devices, but keeping contents of RAM, then send only newly dirtied RAM plus complete device state?
diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 8c41434..f5bcf0b 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -495,6 +495,33 @@ enum { FD_DIR_DSKCHG = 0x80, }; +/* + * See chapter 5.0 "Controller phases" of the spec: + * + * Command phase: + * The host writes a command and its parameters into the FIFO. The command + * phase is completed when all parameters for the command have been supplied, + * and execution phase is entered. + * + * Execution phase: + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO + * contains the payload now, otherwise it's unused. When all bytes of the + * required data have been transferred, the state is switched to either result + * phase (if the command produces status bytes) or directly back into the + * command phase for the next command. + * + * Result phase: + * The host reads out the FIFO, which contains one or more result bytes now. + */ +enum { + /* Only for migration: reconstruct phase from registers like qemu 2.3 */ + FD_PHASE_RECONSTRUCT = 0, + + FD_PHASE_COMMAND = 1, + FD_PHASE_EXECUTION = 2, + FD_PHASE_RESULT = 3, +}; + #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI) #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT) @@ -504,6 +531,7 @@ struct FDCtrl { /* Controller state */ QEMUTimer *result_timer; int dma_chann; + uint8_t phase; /* Controller's identification */ uint8_t version; /* HW */ @@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = { } }; +/* + * Reconstructs the phase from register values according to the logic that was + * implemented in qemu 2.3. This is the default value that is used if the phase + * subsection is not present on migration. + * + * Don't change this function to reflect newer qemu versions, it is part of + * the migration ABI. + */ +static int reconstruct_phase(FDCtrl *fdctrl) +{ + if (fdctrl->msr & FD_MSR_NONDMA) { + return FD_PHASE_EXECUTION; + } else if ((fdctrl->msr & FD_MSR_RQM) == 0) { + /* qemu 2.3 disabled RQM only during DMA transfers */ + return FD_PHASE_EXECUTION; + } else if (fdctrl->msr & FD_MSR_DIO) { + return FD_PHASE_RESULT; + } else { + return FD_PHASE_COMMAND; + } +} + static void fdc_pre_save(void *opaque) { FDCtrl *s = opaque; @@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque) s->dor_vmstate = s->dor | GET_CUR_DRV(s); } +static int fdc_pre_load(void *opaque) +{ + FDCtrl *s = opaque; + s->phase = FD_PHASE_RECONSTRUCT; + return 0; +} + static int fdc_post_load(void *opaque, int version_id) { FDCtrl *s = opaque; SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK); s->dor = s->dor_vmstate & ~FD_DOR_SELMASK; + + if (s->phase == FD_PHASE_RECONSTRUCT) { + s->phase = reconstruct_phase(s); + } + return 0; } @@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = { } }; +static bool fdc_phase_needed(void *opaque) +{ + FDCtrl *fdctrl = opaque; + + return reconstruct_phase(fdctrl) != fdctrl->phase; +} + +static const VMStateDescription vmstate_fdc_phase = { + .name = "fdc/phase", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(phase, FDCtrl), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_fdc = { .name = "fdc", .version_id = 2, .minimum_version_id = 2, .pre_save = fdc_pre_save, + .pre_load = fdc_pre_load, .post_load = fdc_post_load, .fields = (VMStateField[]) { /* Controller State */ @@ -839,6 +919,9 @@ static const VMStateDescription vmstate_fdc = { .vmsd = &vmstate_fdc_result_timer, .needed = fdc_result_timer_needed, } , { + .vmsd = &vmstate_fdc_phase, + .needed = fdc_phase_needed, + } , { /* empty */ } } @@ -1137,6 +1220,7 @@ static uint32_t fdctrl_read_dir(FDCtrl *fdctrl) /* Clear the FIFO and update the state for receiving the next command */ static void fdctrl_to_command_phase(FDCtrl *fdctrl) { + fdctrl->phase = FD_PHASE_COMMAND; fdctrl->data_dir = FD_DIR_WRITE; fdctrl->data_pos = 0; fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO); @@ -1146,6 +1230,7 @@ static void fdctrl_to_command_phase(FDCtrl *fdctrl) * @fifo_len is the number of result bytes to be read out. */ static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len) { + fdctrl->phase = FD_PHASE_RESULT; fdctrl->data_dir = FD_DIR_READ; fdctrl->data_len = fifo_len; fdctrl->data_pos = 0; @@ -1912,6 +1997,9 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction) fdctrl_raise_irq(fdctrl); } +/* + * Handlers for the execution phase of each command + */ static const struct { uint8_t value; uint8_t mask; @@ -2015,6 +2103,7 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) /* We now have all parameters * and will be able to treat the command */ + fdctrl->phase = FD_PHASE_EXECUTION; if (fdctrl->data_state & FD_STATE_FORMAT) { fdctrl_format_sector(fdctrl); return;
The floppy controller spec describes three different controller phases, which are currently not explicitly modelled in our emulation. Instead, each phase is represented by a combination of flags in registers. This patch makes explicit in which phase the controller currently is. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/block/fdc.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+)