diff mbox

[RFC] Introduce vm_stop_permanent()

Message ID 20110727154229.656532d4@doriath
State New
Headers show

Commit Message

Luiz Capitulino July 27, 2011, 6:42 p.m. UTC
This function should be used when the VM is not supposed to resume
execution (eg. by issuing 'cont' monitor command).

Today, we allow the user to resume execution even when:

 o the guest shuts down and -no-shutdown is used
 o there's a kvm internal error
 o loading the VM state with -loadvm or "loadvm" in the monitor fails

I think only badness can happen from the cases above.

Another possible candidate is migration, that's, when the VM is
automatically stopped following a successful migration.

Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
---
 cpus.c    |    7 +++++++
 kvm-all.c |    2 +-
 monitor.c |    9 +++++++--
 qerror.c  |    4 ++++
 qerror.h  |    3 +++
 sysemu.h  |    2 ++
 vl.c      |    4 +++-
 7 files changed, 27 insertions(+), 4 deletions(-)

Comments

Blue Swirl July 27, 2011, 9:44 p.m. UTC | #1
On Wed, Jul 27, 2011 at 9:42 PM, Luiz Capitulino <lcapitulino@redhat.com> wrote:
> This function should be used when the VM is not supposed to resume
> execution (eg. by issuing 'cont' monitor command).
>
> Today, we allow the user to resume execution even when:
>
>  o the guest shuts down and -no-shutdown is used
>  o there's a kvm internal error
>  o loading the VM state with -loadvm or "loadvm" in the monitor fails
>
> I think only badness can happen from the cases above.

I'd suppose a system_reset should bring the system back to sanity and
then clear vm_permanent_stopped (where's -ly?) except maybe for KVM
internal error if that can't be recovered. Then it would not very
permanent anymore, so the name would need adjusting.

>
> Another possible candidate is migration, that's, when the VM is
> automatically stopped following a successful migration.
>
> Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
> ---
>  cpus.c    |    7 +++++++
>  kvm-all.c |    2 +-
>  monitor.c |    9 +++++++--
>  qerror.c  |    4 ++++
>  qerror.h  |    3 +++
>  sysemu.h  |    2 ++
>  vl.c      |    4 +++-
>  7 files changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 6bf4e3f..5a4a480 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -535,6 +535,13 @@ static void qemu_tcg_init_cpu_signals(void)
>  }
>  #endif /* _WIN32 */
>
> +void vm_stop_permanent(int reason)
> +{
> +    vm_stop(reason);
> +    assert(!vm_permanent_stopped);
> +    vm_permanent_stopped = 1;
> +}
> +
>  #ifndef CONFIG_IOTHREAD
>  int qemu_init_main_loop(void)
>  {
> diff --git a/kvm-all.c b/kvm-all.c
> index cbc2532..4d0ceb3 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1014,7 +1014,7 @@ int kvm_cpu_exec(CPUState *env)
>
>     if (ret < 0) {
>         cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
> -        vm_stop(VMSTOP_PANIC);
> +        vm_stop_permanent(VMSTOP_PANIC);
>     }
>
>     env->exit_request = 0;
> diff --git a/monitor.c b/monitor.c
> index 718935b..86e146c 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -1307,7 +1307,10 @@ static int do_cont(Monitor *mon, const QDict *qdict, QObject **ret_data)
>  {
>     struct bdrv_iterate_context context = { mon, 0 };
>
> -    if (incoming_expected) {
> +    if (vm_permanent_stopped) {
> +        qerror_report(QERR_VM_PERMANENT_STOPPED);
> +        return -1;
> +    } else if (incoming_expected) {
>         qerror_report(QERR_MIGRATION_EXPECTED);
>         return -1;
>     }
> @@ -2814,7 +2817,9 @@ static void do_loadvm(Monitor *mon, const QDict *qdict)
>
>     vm_stop(VMSTOP_LOADVM);
>
> -    if (load_vmstate(name) == 0 && saved_vm_running) {
> +    if (load_vmstate(name) < 0) {
> +        vm_permanent_stopped = 1;
> +    } else if (saved_vm_running) {
>         vm_start();
>     }
>  }
> diff --git a/qerror.c b/qerror.c
> index 69c1bc9..c91f763 100644
> --- a/qerror.c
> +++ b/qerror.c
> @@ -219,6 +219,10 @@ static const QErrorStringTable qerror_table[] = {
>                      "supported by this qemu version: %(feature)",
>     },
>     {
> +        .error_fmt = QERR_VM_PERMANENT_STOPPED,
> +        .desc      = "The Virtual Machine is permanently stopped",
> +    },
> +    {
>         .error_fmt = QERR_VNC_SERVER_FAILED,
>         .desc      = "Could not start VNC server on %(target)",
>     },
> diff --git a/qerror.h b/qerror.h
> index 8058456..c759172 100644
> --- a/qerror.h
> +++ b/qerror.h
> @@ -181,6 +181,9 @@ QError *qobject_to_qerror(const QObject *obj);
>  #define QERR_UNKNOWN_BLOCK_FORMAT_FEATURE \
>     "{ 'class': 'UnknownBlockFormatFeature', 'data': { 'device': %s, 'format': %s, 'feature': %s } }"
>
> +#define QERR_VM_PERMANENT_STOPPED \
> +    "{ 'class': 'PermanentStopped', 'data': {} }"
> +
>  #define QERR_VNC_SERVER_FAILED \
>     "{ 'class': 'VNCServerFailed', 'data': { 'target': %s } }"
>
> diff --git a/sysemu.h b/sysemu.h
> index d3013f5..af90fa3 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -12,6 +12,7 @@
>  extern const char *bios_name;
>
>  extern int vm_running;
> +extern int vm_permanent_stopped;
>  extern const char *qemu_name;
>  extern uint8_t qemu_uuid[];
>  int qemu_uuid_parse(const char *str, uint8_t *uuid);
> @@ -39,6 +40,7 @@ void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
>
>  void vm_start(void);
>  void vm_stop(int reason);
> +void vm_stop_permanent(int reason);
>
>  void qemu_system_reset_request(void);
>  void qemu_system_shutdown_request(void);
> diff --git a/vl.c b/vl.c
> index 4b6688b..96403ac 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -186,6 +186,7 @@ NICInfo nd_table[MAX_NICS];
>  int vm_running;
>  int autostart;
>  int incoming_expected; /* Started with -incoming and waiting for incoming */
> +int vm_permanent_stopped = 0; /* qemu will never resume if set */
>  static int rtc_utc = 1;
>  static int rtc_date_offset = -1; /* -1 means no change */
>  QEMUClock *rtc_clock;
> @@ -1397,7 +1398,7 @@ static void main_loop(void)
>             qemu_kill_report();
>             monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
>             if (no_shutdown) {
> -                vm_stop(VMSTOP_SHUTDOWN);
> +                vm_stop_permanent(VMSTOP_SHUTDOWN);
>             } else
>                 break;
>         }
> @@ -3315,6 +3316,7 @@ int main(int argc, char **argv, char **envp)
>     qemu_system_reset(VMRESET_SILENT);
>     if (loadvm) {
>         if (load_vmstate(loadvm) < 0) {
> +            vm_permanent_stopped = 1;
>             autostart = 0;
>         }
>     }
> --
> 1.7.6.347.g4db0d
>
>
>
Avi Kivity July 28, 2011, 7:23 a.m. UTC | #2
On 07/28/2011 12:44 AM, Blue Swirl wrote:
> On Wed, Jul 27, 2011 at 9:42 PM, Luiz Capitulino<lcapitulino@redhat.com>  wrote:
> >  This function should be used when the VM is not supposed to resume
> >  execution (eg. by issuing 'cont' monitor command).
> >
> >  Today, we allow the user to resume execution even when:
> >
> >    o the guest shuts down and -no-shutdown is used
> >    o there's a kvm internal error
> >    o loading the VM state with -loadvm or "loadvm" in the monitor fails
> >
> >  I think only badness can happen from the cases above.
>
> I'd suppose a system_reset should bring the system back to sanity and
> then clear vm_permanent_stopped (where's -ly?) except maybe for KVM
> internal error if that can't be recovered. Then it would not very
> permanent anymore, so the name would need adjusting.

Currently, all kvm internal errors are recoverable by reset (and 
possibly by fiddling with memory/registers).
Luiz Capitulino July 28, 2011, 1:31 p.m. UTC | #3
On Thu, 28 Jul 2011 10:23:22 +0300
Avi Kivity <avi@redhat.com> wrote:

> On 07/28/2011 12:44 AM, Blue Swirl wrote:
> > On Wed, Jul 27, 2011 at 9:42 PM, Luiz Capitulino<lcapitulino@redhat.com>  wrote:
> > >  This function should be used when the VM is not supposed to resume
> > >  execution (eg. by issuing 'cont' monitor command).
> > >
> > >  Today, we allow the user to resume execution even when:
> > >
> > >    o the guest shuts down and -no-shutdown is used
> > >    o there's a kvm internal error
> > >    o loading the VM state with -loadvm or "loadvm" in the monitor fails
> > >
> > >  I think only badness can happen from the cases above.
> >
> > I'd suppose a system_reset should bring the system back to sanity and
> > then clear vm_permanent_stopped (where's -ly?)

What's -ly?

> > except maybe for KVM
> > internal error if that can't be recovered. Then it would not very
> > permanent anymore, so the name would need adjusting.
> 
> Currently, all kvm internal errors are recoverable by reset (and 
> possibly by fiddling with memory/registers).

Ok, but a poweroff in the guest isn't recoverable with system_reset
right? Or does it depend on the guest?

I get funny results if qemu is started with -no-shutdown and I run cont after
a poweroff in a F15 guest. Sometimes qemu will exit after a few seconds,
sometimes 'info status' will say 'running'.
Avi Kivity July 28, 2011, 1:37 p.m. UTC | #4
On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
> On Thu, 28 Jul 2011 10:23:22 +0300
> Avi Kivity<avi@redhat.com>  wrote:
>
> >  On 07/28/2011 12:44 AM, Blue Swirl wrote:
> >  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz Capitulino<lcapitulino@redhat.com>   wrote:
> >  >  >   This function should be used when the VM is not supposed to resume
> >  >  >   execution (eg. by issuing 'cont' monitor command).
> >  >  >
> >  >  >   Today, we allow the user to resume execution even when:
> >  >  >
> >  >  >     o the guest shuts down and -no-shutdown is used
> >  >  >     o there's a kvm internal error
> >  >  >     o loading the VM state with -loadvm or "loadvm" in the monitor fails
> >  >  >
> >  >  >   I think only badness can happen from the cases above.
> >  >
> >  >  I'd suppose a system_reset should bring the system back to sanity and
> >  >  then clear vm_permanent_stopped (where's -ly?)
>
> What's -ly?
>

permanent-ly.

> >  >  except maybe for KVM
> >  >  internal error if that can't be recovered. Then it would not very
> >  >  permanent anymore, so the name would need adjusting.
> >
> >  Currently, all kvm internal errors are recoverable by reset (and
> >  possibly by fiddling with memory/registers).
>
> Ok, but a poweroff in the guest isn't recoverable with system_reset
> right? Or does it depend on the guest?

Right, it's not recoverable if you shut the power down where the tractor 
beam is coupled to the main reactor.

> I get funny results if qemu is started with -no-shutdown and I run cont after
> a poweroff in a F15 guest. Sometimes qemu will exit after a few seconds,
> sometimes 'info status' will say 'running'.

Yes, best fixed.
Jan Kiszka July 28, 2011, 2:19 p.m. UTC | #5
On 2011-07-28 15:37, Avi Kivity wrote:
> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
>> On Thu, 28 Jul 2011 10:23:22 +0300
>> Avi Kivity<avi@redhat.com>  wrote:
>>
>> >  On 07/28/2011 12:44 AM, Blue Swirl wrote:
>> >  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
>> Capitulino<lcapitulino@redhat.com>   wrote:
>> >  >  >   This function should be used when the VM is not supposed to
>> resume
>> >  >  >   execution (eg. by issuing 'cont' monitor command).
>> >  >  >
>> >  >  >   Today, we allow the user to resume execution even when:
>> >  >  >
>> >  >  >     o the guest shuts down and -no-shutdown is used
>> >  >  >     o there's a kvm internal error
>> >  >  >     o loading the VM state with -loadvm or "loadvm" in the
>> monitor fails
>> >  >  >
>> >  >  >   I think only badness can happen from the cases above.
>> >  >
>> >  >  I'd suppose a system_reset should bring the system back to
>> sanity and
>> >  >  then clear vm_permanent_stopped (where's -ly?)
>>
>> What's -ly?
>>
> 
> permanent-ly.
> 
>> >  >  except maybe for KVM
>> >  >  internal error if that can't be recovered. Then it would not very
>> >  >  permanent anymore, so the name would need adjusting.
>> >
>> >  Currently, all kvm internal errors are recoverable by reset (and
>> >  possibly by fiddling with memory/registers).
>>
>> Ok, but a poweroff in the guest isn't recoverable with system_reset
>> right? Or does it depend on the guest?
> 
> Right, it's not recoverable if you shut the power down where the tractor
> beam is coupled to the main reactor.

system_reset will bring all emulated devices back into their power-on
state - unless we have remaining bugs to fix. Actually, one may consider
issuing this reset automatically on vm_start after "permant" vm_stop.

Jan
Luiz Capitulino July 28, 2011, 3:18 p.m. UTC | #6
On Thu, 28 Jul 2011 16:19:19 +0200
Jan Kiszka <jan.kiszka@web.de> wrote:

> On 2011-07-28 15:37, Avi Kivity wrote:
> > On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
> >> On Thu, 28 Jul 2011 10:23:22 +0300
> >> Avi Kivity<avi@redhat.com>  wrote:
> >>
> >> >  On 07/28/2011 12:44 AM, Blue Swirl wrote:
> >> >  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
> >> Capitulino<lcapitulino@redhat.com>   wrote:
> >> >  >  >   This function should be used when the VM is not supposed to
> >> resume
> >> >  >  >   execution (eg. by issuing 'cont' monitor command).
> >> >  >  >
> >> >  >  >   Today, we allow the user to resume execution even when:
> >> >  >  >
> >> >  >  >     o the guest shuts down and -no-shutdown is used
> >> >  >  >     o there's a kvm internal error
> >> >  >  >     o loading the VM state with -loadvm or "loadvm" in the
> >> monitor fails
> >> >  >  >
> >> >  >  >   I think only badness can happen from the cases above.
> >> >  >
> >> >  >  I'd suppose a system_reset should bring the system back to
> >> sanity and
> >> >  >  then clear vm_permanent_stopped (where's -ly?)
> >>
> >> What's -ly?
> >>
> > 
> > permanent-ly.
> > 
> >> >  >  except maybe for KVM
> >> >  >  internal error if that can't be recovered. Then it would not very
> >> >  >  permanent anymore, so the name would need adjusting.
> >> >
> >> >  Currently, all kvm internal errors are recoverable by reset (and
> >> >  possibly by fiddling with memory/registers).
> >>
> >> Ok, but a poweroff in the guest isn't recoverable with system_reset
> >> right? Or does it depend on the guest?
> > 
> > Right, it's not recoverable if you shut the power down where the tractor
> > beam is coupled to the main reactor.
> 
> system_reset will bring all emulated devices back into their power-on
> state - unless we have remaining bugs to fix. Actually, one may consider
> issuing this reset automatically on vm_start after "permant" vm_stop.

I'm going to abandon the vm_stop_permanent() idea.

I'm thinking something towards having a stop_kind argument to vm_stop(),
which could be temporary (which is what we have today), only with
system_reset or permanent.

Or maybe vm_stop_opt(), which allows for fine grained control of stop
(vm_stop() would default to today's behavior)...
Jan Kiszka July 28, 2011, 3:20 p.m. UTC | #7
On 2011-07-28 17:18, Luiz Capitulino wrote:
> On Thu, 28 Jul 2011 16:19:19 +0200
> Jan Kiszka <jan.kiszka@web.de> wrote:
> 
>> On 2011-07-28 15:37, Avi Kivity wrote:
>>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
>>>> On Thu, 28 Jul 2011 10:23:22 +0300
>>>> Avi Kivity<avi@redhat.com>  wrote:
>>>>
>>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
>>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
>>>> Capitulino<lcapitulino@redhat.com>   wrote:
>>>>>  >  >   This function should be used when the VM is not supposed to
>>>> resume
>>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
>>>>>  >  >
>>>>>  >  >   Today, we allow the user to resume execution even when:
>>>>>  >  >
>>>>>  >  >     o the guest shuts down and -no-shutdown is used
>>>>>  >  >     o there's a kvm internal error
>>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
>>>> monitor fails
>>>>>  >  >
>>>>>  >  >   I think only badness can happen from the cases above.
>>>>>  >
>>>>>  >  I'd suppose a system_reset should bring the system back to
>>>> sanity and
>>>>>  >  then clear vm_permanent_stopped (where's -ly?)
>>>>
>>>> What's -ly?
>>>>
>>>
>>> permanent-ly.
>>>
>>>>>  >  except maybe for KVM
>>>>>  >  internal error if that can't be recovered. Then it would not very
>>>>>  >  permanent anymore, so the name would need adjusting.
>>>>>
>>>>>  Currently, all kvm internal errors are recoverable by reset (and
>>>>>  possibly by fiddling with memory/registers).
>>>>
>>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
>>>> right? Or does it depend on the guest?
>>>
>>> Right, it's not recoverable if you shut the power down where the tractor
>>> beam is coupled to the main reactor.
>>
>> system_reset will bring all emulated devices back into their power-on
>> state - unless we have remaining bugs to fix. Actually, one may consider
>> issuing this reset automatically on vm_start after "permant" vm_stop.
> 
> I'm going to abandon the vm_stop_permanent() idea.
> 
> I'm thinking something towards having a stop_kind argument to vm_stop(),
> which could be temporary (which is what we have today), only with
> system_reset or permanent.
> 
> Or maybe vm_stop_opt(), which allows for fine grained control of stop
> (vm_stop() would default to today's behavior)...

But the right point for reset is start, not stop. Otherwise we loose the
state before the stop.

Jam
Luiz Capitulino July 28, 2011, 5:39 p.m. UTC | #8
On Thu, 28 Jul 2011 17:20:41 +0200
Jan Kiszka <jan.kiszka@web.de> wrote:

> On 2011-07-28 17:18, Luiz Capitulino wrote:
> > On Thu, 28 Jul 2011 16:19:19 +0200
> > Jan Kiszka <jan.kiszka@web.de> wrote:
> > 
> >> On 2011-07-28 15:37, Avi Kivity wrote:
> >>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
> >>>> On Thu, 28 Jul 2011 10:23:22 +0300
> >>>> Avi Kivity<avi@redhat.com>  wrote:
> >>>>
> >>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
> >>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
> >>>> Capitulino<lcapitulino@redhat.com>   wrote:
> >>>>>  >  >   This function should be used when the VM is not supposed to
> >>>> resume
> >>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
> >>>>>  >  >
> >>>>>  >  >   Today, we allow the user to resume execution even when:
> >>>>>  >  >
> >>>>>  >  >     o the guest shuts down and -no-shutdown is used
> >>>>>  >  >     o there's a kvm internal error
> >>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
> >>>> monitor fails
> >>>>>  >  >
> >>>>>  >  >   I think only badness can happen from the cases above.
> >>>>>  >
> >>>>>  >  I'd suppose a system_reset should bring the system back to
> >>>> sanity and
> >>>>>  >  then clear vm_permanent_stopped (where's -ly?)
> >>>>
> >>>> What's -ly?
> >>>>
> >>>
> >>> permanent-ly.
> >>>
> >>>>>  >  except maybe for KVM
> >>>>>  >  internal error if that can't be recovered. Then it would not very
> >>>>>  >  permanent anymore, so the name would need adjusting.
> >>>>>
> >>>>>  Currently, all kvm internal errors are recoverable by reset (and
> >>>>>  possibly by fiddling with memory/registers).
> >>>>
> >>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
> >>>> right? Or does it depend on the guest?
> >>>
> >>> Right, it's not recoverable if you shut the power down where the tractor
> >>> beam is coupled to the main reactor.
> >>
> >> system_reset will bring all emulated devices back into their power-on
> >> state - unless we have remaining bugs to fix. Actually, one may consider
> >> issuing this reset automatically on vm_start after "permant" vm_stop.

The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.

Are you saying that system_reset should be able to recover from that too?

If yes, then I guess the right place is do_cont(). But I'm not sure we
should do that (if -no-shutdown is not used, we'll exit() anyway).

> > 
> > I'm going to abandon the vm_stop_permanent() idea.
> > 
> > I'm thinking something towards having a stop_kind argument to vm_stop(),
> > which could be temporary (which is what we have today), only with
> > system_reset or permanent.
> > 
> > Or maybe vm_stop_opt(), which allows for fine grained control of stop
> > (vm_stop() would default to today's behavior)...
> 
> But the right point for reset is start, not stop. Otherwise we loose the
> state before the stop.
Luiz Capitulino July 28, 2011, 5:48 p.m. UTC | #9
On Thu, 28 Jul 2011 14:39:23 -0300
Luiz Capitulino <lcapitulino@redhat.com> wrote:

> On Thu, 28 Jul 2011 17:20:41 +0200
> Jan Kiszka <jan.kiszka@web.de> wrote:
> 
> > On 2011-07-28 17:18, Luiz Capitulino wrote:
> > > On Thu, 28 Jul 2011 16:19:19 +0200
> > > Jan Kiszka <jan.kiszka@web.de> wrote:
> > > 
> > >> On 2011-07-28 15:37, Avi Kivity wrote:
> > >>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
> > >>>> On Thu, 28 Jul 2011 10:23:22 +0300
> > >>>> Avi Kivity<avi@redhat.com>  wrote:
> > >>>>
> > >>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
> > >>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
> > >>>> Capitulino<lcapitulino@redhat.com>   wrote:
> > >>>>>  >  >   This function should be used when the VM is not supposed to
> > >>>> resume
> > >>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
> > >>>>>  >  >
> > >>>>>  >  >   Today, we allow the user to resume execution even when:
> > >>>>>  >  >
> > >>>>>  >  >     o the guest shuts down and -no-shutdown is used
> > >>>>>  >  >     o there's a kvm internal error
> > >>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
> > >>>> monitor fails
> > >>>>>  >  >
> > >>>>>  >  >   I think only badness can happen from the cases above.
> > >>>>>  >
> > >>>>>  >  I'd suppose a system_reset should bring the system back to
> > >>>> sanity and
> > >>>>>  >  then clear vm_permanent_stopped (where's -ly?)
> > >>>>
> > >>>> What's -ly?
> > >>>>
> > >>>
> > >>> permanent-ly.
> > >>>
> > >>>>>  >  except maybe for KVM
> > >>>>>  >  internal error if that can't be recovered. Then it would not very
> > >>>>>  >  permanent anymore, so the name would need adjusting.
> > >>>>>
> > >>>>>  Currently, all kvm internal errors are recoverable by reset (and
> > >>>>>  possibly by fiddling with memory/registers).
> > >>>>
> > >>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
> > >>>> right? Or does it depend on the guest?
> > >>>
> > >>> Right, it's not recoverable if you shut the power down where the tractor
> > >>> beam is coupled to the main reactor.
> > >>
> > >> system_reset will bring all emulated devices back into their power-on
> > >> state - unless we have remaining bugs to fix. Actually, one may consider
> > >> issuing this reset automatically on vm_start after "permant" vm_stop.
> 
> The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.
> 
> Are you saying that system_reset should be able to recover from that too?

It already does, so we don't have permanent stops.

> 
> If yes, then I guess the right place is do_cont(). But I'm not sure we
> should do that (if -no-shutdown is not used, we'll exit() anyway).
> 
> > > 
> > > I'm going to abandon the vm_stop_permanent() idea.
> > > 
> > > I'm thinking something towards having a stop_kind argument to vm_stop(),
> > > which could be temporary (which is what we have today), only with
> > > system_reset or permanent.
> > > 
> > > Or maybe vm_stop_opt(), which allows for fine grained control of stop
> > > (vm_stop() would default to today's behavior)...
> > 
> > But the right point for reset is start, not stop. Otherwise we loose the
> > state before the stop.
>
Jan Kiszka July 28, 2011, 5:52 p.m. UTC | #10
On 2011-07-28 19:48, Luiz Capitulino wrote:
> On Thu, 28 Jul 2011 14:39:23 -0300
> Luiz Capitulino <lcapitulino@redhat.com> wrote:
> 
>> On Thu, 28 Jul 2011 17:20:41 +0200
>> Jan Kiszka <jan.kiszka@web.de> wrote:
>>
>>> On 2011-07-28 17:18, Luiz Capitulino wrote:
>>>> On Thu, 28 Jul 2011 16:19:19 +0200
>>>> Jan Kiszka <jan.kiszka@web.de> wrote:
>>>>
>>>>> On 2011-07-28 15:37, Avi Kivity wrote:
>>>>>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
>>>>>>> On Thu, 28 Jul 2011 10:23:22 +0300
>>>>>>> Avi Kivity<avi@redhat.com>  wrote:
>>>>>>>
>>>>>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
>>>>>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
>>>>>>> Capitulino<lcapitulino@redhat.com>   wrote:
>>>>>>>>  >  >   This function should be used when the VM is not supposed to
>>>>>>> resume
>>>>>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
>>>>>>>>  >  >
>>>>>>>>  >  >   Today, we allow the user to resume execution even when:
>>>>>>>>  >  >
>>>>>>>>  >  >     o the guest shuts down and -no-shutdown is used
>>>>>>>>  >  >     o there's a kvm internal error
>>>>>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
>>>>>>> monitor fails
>>>>>>>>  >  >
>>>>>>>>  >  >   I think only badness can happen from the cases above.
>>>>>>>>  >
>>>>>>>>  >  I'd suppose a system_reset should bring the system back to
>>>>>>> sanity and
>>>>>>>>  >  then clear vm_permanent_stopped (where's -ly?)
>>>>>>>
>>>>>>> What's -ly?
>>>>>>>
>>>>>>
>>>>>> permanent-ly.
>>>>>>
>>>>>>>>  >  except maybe for KVM
>>>>>>>>  >  internal error if that can't be recovered. Then it would not very
>>>>>>>>  >  permanent anymore, so the name would need adjusting.
>>>>>>>>
>>>>>>>>  Currently, all kvm internal errors are recoverable by reset (and
>>>>>>>>  possibly by fiddling with memory/registers).
>>>>>>>
>>>>>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
>>>>>>> right? Or does it depend on the guest?
>>>>>>
>>>>>> Right, it's not recoverable if you shut the power down where the tractor
>>>>>> beam is coupled to the main reactor.
>>>>>
>>>>> system_reset will bring all emulated devices back into their power-on
>>>>> state - unless we have remaining bugs to fix. Actually, one may consider
>>>>> issuing this reset automatically on vm_start after "permant" vm_stop.
>>
>> The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.
>>
>> Are you saying that system_reset should be able to recover from that too?
> 
> It already does, so we don't have permanent stops.

Exactly. We just have stops over inconsistent states that require a
reset to continue with anything useful.

Jan
Luiz Capitulino July 28, 2011, 6 p.m. UTC | #11
On Thu, 28 Jul 2011 19:52:31 +0200
Jan Kiszka <jan.kiszka@web.de> wrote:

> On 2011-07-28 19:48, Luiz Capitulino wrote:
> > On Thu, 28 Jul 2011 14:39:23 -0300
> > Luiz Capitulino <lcapitulino@redhat.com> wrote:
> > 
> >> On Thu, 28 Jul 2011 17:20:41 +0200
> >> Jan Kiszka <jan.kiszka@web.de> wrote:
> >>
> >>> On 2011-07-28 17:18, Luiz Capitulino wrote:
> >>>> On Thu, 28 Jul 2011 16:19:19 +0200
> >>>> Jan Kiszka <jan.kiszka@web.de> wrote:
> >>>>
> >>>>> On 2011-07-28 15:37, Avi Kivity wrote:
> >>>>>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
> >>>>>>> On Thu, 28 Jul 2011 10:23:22 +0300
> >>>>>>> Avi Kivity<avi@redhat.com>  wrote:
> >>>>>>>
> >>>>>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
> >>>>>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
> >>>>>>> Capitulino<lcapitulino@redhat.com>   wrote:
> >>>>>>>>  >  >   This function should be used when the VM is not supposed to
> >>>>>>> resume
> >>>>>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
> >>>>>>>>  >  >
> >>>>>>>>  >  >   Today, we allow the user to resume execution even when:
> >>>>>>>>  >  >
> >>>>>>>>  >  >     o the guest shuts down and -no-shutdown is used
> >>>>>>>>  >  >     o there's a kvm internal error
> >>>>>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
> >>>>>>> monitor fails
> >>>>>>>>  >  >
> >>>>>>>>  >  >   I think only badness can happen from the cases above.
> >>>>>>>>  >
> >>>>>>>>  >  I'd suppose a system_reset should bring the system back to
> >>>>>>> sanity and
> >>>>>>>>  >  then clear vm_permanent_stopped (where's -ly?)
> >>>>>>>
> >>>>>>> What's -ly?
> >>>>>>>
> >>>>>>
> >>>>>> permanent-ly.
> >>>>>>
> >>>>>>>>  >  except maybe for KVM
> >>>>>>>>  >  internal error if that can't be recovered. Then it would not very
> >>>>>>>>  >  permanent anymore, so the name would need adjusting.
> >>>>>>>>
> >>>>>>>>  Currently, all kvm internal errors are recoverable by reset (and
> >>>>>>>>  possibly by fiddling with memory/registers).
> >>>>>>>
> >>>>>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
> >>>>>>> right? Or does it depend on the guest?
> >>>>>>
> >>>>>> Right, it's not recoverable if you shut the power down where the tractor
> >>>>>> beam is coupled to the main reactor.
> >>>>>
> >>>>> system_reset will bring all emulated devices back into their power-on
> >>>>> state - unless we have remaining bugs to fix. Actually, one may consider
> >>>>> issuing this reset automatically on vm_start after "permant" vm_stop.
> >>
> >> The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.
> >>
> >> Are you saying that system_reset should be able to recover from that too?
> > 
> > It already does, so we don't have permanent stops.
> 
> Exactly. We just have stops over inconsistent states that require a
> reset to continue with anything useful.

Yes. If I got you right, you suggest that we do the reset automatically.

I think it's better to let the user do it, because s/he might want to
do something else before resetting. For example, for the kvm error the
user might want to save the vm state.

For the poweroff case with -no-shutdown it's probably fine, but I don't
want to hard code special cases. It's better and easier to treat them all
as "require system_reset to recover".
Jan Kiszka July 28, 2011, 6:04 p.m. UTC | #12
On 2011-07-28 20:00, Luiz Capitulino wrote:
> On Thu, 28 Jul 2011 19:52:31 +0200
> Jan Kiszka <jan.kiszka@web.de> wrote:
> 
>> On 2011-07-28 19:48, Luiz Capitulino wrote:
>>> On Thu, 28 Jul 2011 14:39:23 -0300
>>> Luiz Capitulino <lcapitulino@redhat.com> wrote:
>>>
>>>> On Thu, 28 Jul 2011 17:20:41 +0200
>>>> Jan Kiszka <jan.kiszka@web.de> wrote:
>>>>
>>>>> On 2011-07-28 17:18, Luiz Capitulino wrote:
>>>>>> On Thu, 28 Jul 2011 16:19:19 +0200
>>>>>> Jan Kiszka <jan.kiszka@web.de> wrote:
>>>>>>
>>>>>>> On 2011-07-28 15:37, Avi Kivity wrote:
>>>>>>>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
>>>>>>>>> On Thu, 28 Jul 2011 10:23:22 +0300
>>>>>>>>> Avi Kivity<avi@redhat.com>  wrote:
>>>>>>>>>
>>>>>>>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
>>>>>>>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
>>>>>>>>> Capitulino<lcapitulino@redhat.com>   wrote:
>>>>>>>>>>  >  >   This function should be used when the VM is not supposed to
>>>>>>>>> resume
>>>>>>>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
>>>>>>>>>>  >  >
>>>>>>>>>>  >  >   Today, we allow the user to resume execution even when:
>>>>>>>>>>  >  >
>>>>>>>>>>  >  >     o the guest shuts down and -no-shutdown is used
>>>>>>>>>>  >  >     o there's a kvm internal error
>>>>>>>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
>>>>>>>>> monitor fails
>>>>>>>>>>  >  >
>>>>>>>>>>  >  >   I think only badness can happen from the cases above.
>>>>>>>>>>  >
>>>>>>>>>>  >  I'd suppose a system_reset should bring the system back to
>>>>>>>>> sanity and
>>>>>>>>>>  >  then clear vm_permanent_stopped (where's -ly?)
>>>>>>>>>
>>>>>>>>> What's -ly?
>>>>>>>>>
>>>>>>>>
>>>>>>>> permanent-ly.
>>>>>>>>
>>>>>>>>>>  >  except maybe for KVM
>>>>>>>>>>  >  internal error if that can't be recovered. Then it would not very
>>>>>>>>>>  >  permanent anymore, so the name would need adjusting.
>>>>>>>>>>
>>>>>>>>>>  Currently, all kvm internal errors are recoverable by reset (and
>>>>>>>>>>  possibly by fiddling with memory/registers).
>>>>>>>>>
>>>>>>>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
>>>>>>>>> right? Or does it depend on the guest?
>>>>>>>>
>>>>>>>> Right, it's not recoverable if you shut the power down where the tractor
>>>>>>>> beam is coupled to the main reactor.
>>>>>>>
>>>>>>> system_reset will bring all emulated devices back into their power-on
>>>>>>> state - unless we have remaining bugs to fix. Actually, one may consider
>>>>>>> issuing this reset automatically on vm_start after "permant" vm_stop.
>>>>
>>>> The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.
>>>>
>>>> Are you saying that system_reset should be able to recover from that too?
>>>
>>> It already does, so we don't have permanent stops.
>>
>> Exactly. We just have stops over inconsistent states that require a
>> reset to continue with anything useful.
> 
> Yes. If I got you right, you suggest that we do the reset automatically.
> 
> I think it's better to let the user do it, because s/he might want to
> do something else before resetting. For example, for the kvm error the
> user might want to save the vm state.

Associating the reset with a cont means requesting an explicit action
from the user. I'm not suggesting to do the reset when the stop state is
entered.

> 
> For the poweroff case with -no-shutdown it's probably fine, but I don't
> want to hard code special cases. It's better and easier to treat them all
> as "require system_reset to recover".

In any case, we need to tag the current state as stopped-and-invalid or
so vs. a normal stop. That remains a valuable first step. How to deal
with that information is the second one.

Jan
Luiz Capitulino July 28, 2011, 6:22 p.m. UTC | #13
On Thu, 28 Jul 2011 20:04:58 +0200
Jan Kiszka <jan.kiszka@web.de> wrote:

> On 2011-07-28 20:00, Luiz Capitulino wrote:
> > On Thu, 28 Jul 2011 19:52:31 +0200
> > Jan Kiszka <jan.kiszka@web.de> wrote:
> > 
> >> On 2011-07-28 19:48, Luiz Capitulino wrote:
> >>> On Thu, 28 Jul 2011 14:39:23 -0300
> >>> Luiz Capitulino <lcapitulino@redhat.com> wrote:
> >>>
> >>>> On Thu, 28 Jul 2011 17:20:41 +0200
> >>>> Jan Kiszka <jan.kiszka@web.de> wrote:
> >>>>
> >>>>> On 2011-07-28 17:18, Luiz Capitulino wrote:
> >>>>>> On Thu, 28 Jul 2011 16:19:19 +0200
> >>>>>> Jan Kiszka <jan.kiszka@web.de> wrote:
> >>>>>>
> >>>>>>> On 2011-07-28 15:37, Avi Kivity wrote:
> >>>>>>>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
> >>>>>>>>> On Thu, 28 Jul 2011 10:23:22 +0300
> >>>>>>>>> Avi Kivity<avi@redhat.com>  wrote:
> >>>>>>>>>
> >>>>>>>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
> >>>>>>>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
> >>>>>>>>> Capitulino<lcapitulino@redhat.com>   wrote:
> >>>>>>>>>>  >  >   This function should be used when the VM is not supposed to
> >>>>>>>>> resume
> >>>>>>>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
> >>>>>>>>>>  >  >
> >>>>>>>>>>  >  >   Today, we allow the user to resume execution even when:
> >>>>>>>>>>  >  >
> >>>>>>>>>>  >  >     o the guest shuts down and -no-shutdown is used
> >>>>>>>>>>  >  >     o there's a kvm internal error
> >>>>>>>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
> >>>>>>>>> monitor fails
> >>>>>>>>>>  >  >
> >>>>>>>>>>  >  >   I think only badness can happen from the cases above.
> >>>>>>>>>>  >
> >>>>>>>>>>  >  I'd suppose a system_reset should bring the system back to
> >>>>>>>>> sanity and
> >>>>>>>>>>  >  then clear vm_permanent_stopped (where's -ly?)
> >>>>>>>>>
> >>>>>>>>> What's -ly?
> >>>>>>>>>
> >>>>>>>>
> >>>>>>>> permanent-ly.
> >>>>>>>>
> >>>>>>>>>>  >  except maybe for KVM
> >>>>>>>>>>  >  internal error if that can't be recovered. Then it would not very
> >>>>>>>>>>  >  permanent anymore, so the name would need adjusting.
> >>>>>>>>>>
> >>>>>>>>>>  Currently, all kvm internal errors are recoverable by reset (and
> >>>>>>>>>>  possibly by fiddling with memory/registers).
> >>>>>>>>>
> >>>>>>>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
> >>>>>>>>> right? Or does it depend on the guest?
> >>>>>>>>
> >>>>>>>> Right, it's not recoverable if you shut the power down where the tractor
> >>>>>>>> beam is coupled to the main reactor.
> >>>>>>>
> >>>>>>> system_reset will bring all emulated devices back into their power-on
> >>>>>>> state - unless we have remaining bugs to fix. Actually, one may consider
> >>>>>>> issuing this reset automatically on vm_start after "permant" vm_stop.
> >>>>
> >>>> The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.
> >>>>
> >>>> Are you saying that system_reset should be able to recover from that too?
> >>>
> >>> It already does, so we don't have permanent stops.
> >>
> >> Exactly. We just have stops over inconsistent states that require a
> >> reset to continue with anything useful.
> > 
> > Yes. If I got you right, you suggest that we do the reset automatically.
> > 
> > I think it's better to let the user do it, because s/he might want to
> > do something else before resetting. For example, for the kvm error the
> > user might want to save the vm state.
> 
> Associating the reset with a cont means requesting an explicit action
> from the user. I'm not suggesting to do the reset when the stop state is
> entered.

I see. But automatically resetting on cont might be unexpected to the
user, even on a bad state.

Another option would be to add a force option to cont, where the reset is
done when the state is invalid (otherwise cont will return an error).

I still prefer to let the user do it manually though.

> > For the poweroff case with -no-shutdown it's probably fine, but I don't
> > want to hard code special cases. It's better and easier to treat them all
> > as "require system_reset to recover".
> 
> In any case, we need to tag the current state as stopped-and-invalid or
> so vs. a normal stop. That remains a valuable first step. How to deal
> with that information is the second one.
Alon Levy July 28, 2011, 6:28 p.m. UTC | #14
On Thu, Jul 28, 2011 at 03:22:52PM -0300, Luiz Capitulino wrote:
> On Thu, 28 Jul 2011 20:04:58 +0200
> Jan Kiszka <jan.kiszka@web.de> wrote:
> 
> > On 2011-07-28 20:00, Luiz Capitulino wrote:
> > > On Thu, 28 Jul 2011 19:52:31 +0200
> > > Jan Kiszka <jan.kiszka@web.de> wrote:
> > > 
> > >> On 2011-07-28 19:48, Luiz Capitulino wrote:
> > >>> On Thu, 28 Jul 2011 14:39:23 -0300
> > >>> Luiz Capitulino <lcapitulino@redhat.com> wrote:
> > >>>
> > >>>> On Thu, 28 Jul 2011 17:20:41 +0200
> > >>>> Jan Kiszka <jan.kiszka@web.de> wrote:
> > >>>>
> > >>>>> On 2011-07-28 17:18, Luiz Capitulino wrote:
> > >>>>>> On Thu, 28 Jul 2011 16:19:19 +0200
> > >>>>>> Jan Kiszka <jan.kiszka@web.de> wrote:
> > >>>>>>
> > >>>>>>> On 2011-07-28 15:37, Avi Kivity wrote:
> > >>>>>>>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
> > >>>>>>>>> On Thu, 28 Jul 2011 10:23:22 +0300
> > >>>>>>>>> Avi Kivity<avi@redhat.com>  wrote:
> > >>>>>>>>>
> > >>>>>>>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
> > >>>>>>>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
> > >>>>>>>>> Capitulino<lcapitulino@redhat.com>   wrote:
> > >>>>>>>>>>  >  >   This function should be used when the VM is not supposed to
> > >>>>>>>>> resume
> > >>>>>>>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
> > >>>>>>>>>>  >  >
> > >>>>>>>>>>  >  >   Today, we allow the user to resume execution even when:
> > >>>>>>>>>>  >  >
> > >>>>>>>>>>  >  >     o the guest shuts down and -no-shutdown is used
> > >>>>>>>>>>  >  >     o there's a kvm internal error
> > >>>>>>>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
> > >>>>>>>>> monitor fails
> > >>>>>>>>>>  >  >
> > >>>>>>>>>>  >  >   I think only badness can happen from the cases above.
> > >>>>>>>>>>  >
> > >>>>>>>>>>  >  I'd suppose a system_reset should bring the system back to
> > >>>>>>>>> sanity and
> > >>>>>>>>>>  >  then clear vm_permanent_stopped (where's -ly?)
> > >>>>>>>>>
> > >>>>>>>>> What's -ly?
> > >>>>>>>>>
> > >>>>>>>>
> > >>>>>>>> permanent-ly.
> > >>>>>>>>
> > >>>>>>>>>>  >  except maybe for KVM
> > >>>>>>>>>>  >  internal error if that can't be recovered. Then it would not very
> > >>>>>>>>>>  >  permanent anymore, so the name would need adjusting.
> > >>>>>>>>>>
> > >>>>>>>>>>  Currently, all kvm internal errors are recoverable by reset (and
> > >>>>>>>>>>  possibly by fiddling with memory/registers).
> > >>>>>>>>>
> > >>>>>>>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
> > >>>>>>>>> right? Or does it depend on the guest?
> > >>>>>>>>
> > >>>>>>>> Right, it's not recoverable if you shut the power down where the tractor
> > >>>>>>>> beam is coupled to the main reactor.
> > >>>>>>>
> > >>>>>>> system_reset will bring all emulated devices back into their power-on
> > >>>>>>> state - unless we have remaining bugs to fix. Actually, one may consider
> > >>>>>>> issuing this reset automatically on vm_start after "permant" vm_stop.
> > >>>>
> > >>>> The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.
> > >>>>
> > >>>> Are you saying that system_reset should be able to recover from that too?
> > >>>
> > >>> It already does, so we don't have permanent stops.
> > >>
> > >> Exactly. We just have stops over inconsistent states that require a
> > >> reset to continue with anything useful.
> > > 
> > > Yes. If I got you right, you suggest that we do the reset automatically.
> > > 
> > > I think it's better to let the user do it, because s/he might want to
> > > do something else before resetting. For example, for the kvm error the
> > > user might want to save the vm state.
> > 
> > Associating the reset with a cont means requesting an explicit action
> > from the user. I'm not suggesting to do the reset when the stop state is
> > entered.
> 
> I see. But automatically resetting on cont might be unexpected to the
> user, even on a bad state.

I use this feature a lot in testing, i.e. that there is no automatic reset
when I run a vm with -no-shutdown and I do a guest initiated shutdown.
Very convenient when running with snapshot - great time to do commit ide0.
I have a patch I send a month or so ago to make sure you can do this endlessly -
currently after the first shutdown the -no-shutdown set flag is reset.

> 
> Another option would be to add a force option to cont, where the reset is
> done when the state is invalid (otherwise cont will return an error).
> 
> I still prefer to let the user do it manually though.
> 
> > > For the poweroff case with -no-shutdown it's probably fine, but I don't
> > > want to hard code special cases. It's better and easier to treat them all
> > > as "require system_reset to recover".
> > 
> > In any case, we need to tag the current state as stopped-and-invalid or
> > so vs. a normal stop. That remains a valuable first step. How to deal
> > with that information is the second one.
>
Blue Swirl July 30, 2011, 7:41 a.m. UTC | #15
On Thu, Jul 28, 2011 at 9:22 PM, Luiz Capitulino <lcapitulino@redhat.com> wrote:
> On Thu, 28 Jul 2011 20:04:58 +0200
> Jan Kiszka <jan.kiszka@web.de> wrote:
>
>> On 2011-07-28 20:00, Luiz Capitulino wrote:
>> > On Thu, 28 Jul 2011 19:52:31 +0200
>> > Jan Kiszka <jan.kiszka@web.de> wrote:
>> >
>> >> On 2011-07-28 19:48, Luiz Capitulino wrote:
>> >>> On Thu, 28 Jul 2011 14:39:23 -0300
>> >>> Luiz Capitulino <lcapitulino@redhat.com> wrote:
>> >>>
>> >>>> On Thu, 28 Jul 2011 17:20:41 +0200
>> >>>> Jan Kiszka <jan.kiszka@web.de> wrote:
>> >>>>
>> >>>>> On 2011-07-28 17:18, Luiz Capitulino wrote:
>> >>>>>> On Thu, 28 Jul 2011 16:19:19 +0200
>> >>>>>> Jan Kiszka <jan.kiszka@web.de> wrote:
>> >>>>>>
>> >>>>>>> On 2011-07-28 15:37, Avi Kivity wrote:
>> >>>>>>>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
>> >>>>>>>>> On Thu, 28 Jul 2011 10:23:22 +0300
>> >>>>>>>>> Avi Kivity<avi@redhat.com>  wrote:
>> >>>>>>>>>
>> >>>>>>>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
>> >>>>>>>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
>> >>>>>>>>> Capitulino<lcapitulino@redhat.com>   wrote:
>> >>>>>>>>>>  >  >   This function should be used when the VM is not supposed to
>> >>>>>>>>> resume
>> >>>>>>>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
>> >>>>>>>>>>  >  >
>> >>>>>>>>>>  >  >   Today, we allow the user to resume execution even when:
>> >>>>>>>>>>  >  >
>> >>>>>>>>>>  >  >     o the guest shuts down and -no-shutdown is used
>> >>>>>>>>>>  >  >     o there's a kvm internal error
>> >>>>>>>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
>> >>>>>>>>> monitor fails
>> >>>>>>>>>>  >  >
>> >>>>>>>>>>  >  >   I think only badness can happen from the cases above.
>> >>>>>>>>>>  >
>> >>>>>>>>>>  >  I'd suppose a system_reset should bring the system back to
>> >>>>>>>>> sanity and
>> >>>>>>>>>>  >  then clear vm_permanent_stopped (where's -ly?)
>> >>>>>>>>>
>> >>>>>>>>> What's -ly?
>> >>>>>>>>>
>> >>>>>>>>
>> >>>>>>>> permanent-ly.
>> >>>>>>>>
>> >>>>>>>>>>  >  except maybe for KVM
>> >>>>>>>>>>  >  internal error if that can't be recovered. Then it would not very
>> >>>>>>>>>>  >  permanent anymore, so the name would need adjusting.
>> >>>>>>>>>>
>> >>>>>>>>>>  Currently, all kvm internal errors are recoverable by reset (and
>> >>>>>>>>>>  possibly by fiddling with memory/registers).
>> >>>>>>>>>
>> >>>>>>>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
>> >>>>>>>>> right? Or does it depend on the guest?
>> >>>>>>>>
>> >>>>>>>> Right, it's not recoverable if you shut the power down where the tractor
>> >>>>>>>> beam is coupled to the main reactor.
>> >>>>>>>
>> >>>>>>> system_reset will bring all emulated devices back into their power-on
>> >>>>>>> state - unless we have remaining bugs to fix. Actually, one may consider
>> >>>>>>> issuing this reset automatically on vm_start after "permant" vm_stop.
>> >>>>
>> >>>> The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.
>> >>>>
>> >>>> Are you saying that system_reset should be able to recover from that too?
>> >>>
>> >>> It already does, so we don't have permanent stops.
>> >>
>> >> Exactly. We just have stops over inconsistent states that require a
>> >> reset to continue with anything useful.
>> >
>> > Yes. If I got you right, you suggest that we do the reset automatically.
>> >
>> > I think it's better to let the user do it, because s/he might want to
>> > do something else before resetting. For example, for the kvm error the
>> > user might want to save the vm state.
>>
>> Associating the reset with a cont means requesting an explicit action
>> from the user. I'm not suggesting to do the reset when the stop state is
>> entered.
>
> I see. But automatically resetting on cont might be unexpected to the
> user, even on a bad state.
>
> Another option would be to add a force option to cont, where the reset is
> done when the state is invalid (otherwise cont will return an error).
>
> I still prefer to let the user do it manually though.
>
>> > For the poweroff case with -no-shutdown it's probably fine, but I don't
>> > want to hard code special cases. It's better and easier to treat them all
>> > as "require system_reset to recover".
>>
>> In any case, we need to tag the current state as stopped-and-invalid or
>> so vs. a normal stop. That remains a valuable first step. How to deal
>> with that information is the second one.

I think the right way to fix this is to disable 'cont' until a
system_reset is issued. 'cont' should not perform reset but print an
error message about inconsistent state and suggest issuing a
'system_reset'.
Luiz Capitulino Aug. 1, 2011, 12:46 p.m. UTC | #16
On Sat, 30 Jul 2011 10:41:48 +0300
Blue Swirl <blauwirbel@gmail.com> wrote:

> On Thu, Jul 28, 2011 at 9:22 PM, Luiz Capitulino <lcapitulino@redhat.com> wrote:
> > On Thu, 28 Jul 2011 20:04:58 +0200
> > Jan Kiszka <jan.kiszka@web.de> wrote:
> >
> >> On 2011-07-28 20:00, Luiz Capitulino wrote:
> >> > On Thu, 28 Jul 2011 19:52:31 +0200
> >> > Jan Kiszka <jan.kiszka@web.de> wrote:
> >> >
> >> >> On 2011-07-28 19:48, Luiz Capitulino wrote:
> >> >>> On Thu, 28 Jul 2011 14:39:23 -0300
> >> >>> Luiz Capitulino <lcapitulino@redhat.com> wrote:
> >> >>>
> >> >>>> On Thu, 28 Jul 2011 17:20:41 +0200
> >> >>>> Jan Kiszka <jan.kiszka@web.de> wrote:
> >> >>>>
> >> >>>>> On 2011-07-28 17:18, Luiz Capitulino wrote:
> >> >>>>>> On Thu, 28 Jul 2011 16:19:19 +0200
> >> >>>>>> Jan Kiszka <jan.kiszka@web.de> wrote:
> >> >>>>>>
> >> >>>>>>> On 2011-07-28 15:37, Avi Kivity wrote:
> >> >>>>>>>> On 07/28/2011 04:31 PM, Luiz Capitulino wrote:
> >> >>>>>>>>> On Thu, 28 Jul 2011 10:23:22 +0300
> >> >>>>>>>>> Avi Kivity<avi@redhat.com>  wrote:
> >> >>>>>>>>>
> >> >>>>>>>>>>  On 07/28/2011 12:44 AM, Blue Swirl wrote:
> >> >>>>>>>>>>  >  On Wed, Jul 27, 2011 at 9:42 PM, Luiz
> >> >>>>>>>>> Capitulino<lcapitulino@redhat.com>   wrote:
> >> >>>>>>>>>>  >  >   This function should be used when the VM is not supposed to
> >> >>>>>>>>> resume
> >> >>>>>>>>>>  >  >   execution (eg. by issuing 'cont' monitor command).
> >> >>>>>>>>>>  >  >
> >> >>>>>>>>>>  >  >   Today, we allow the user to resume execution even when:
> >> >>>>>>>>>>  >  >
> >> >>>>>>>>>>  >  >     o the guest shuts down and -no-shutdown is used
> >> >>>>>>>>>>  >  >     o there's a kvm internal error
> >> >>>>>>>>>>  >  >     o loading the VM state with -loadvm or "loadvm" in the
> >> >>>>>>>>> monitor fails
> >> >>>>>>>>>>  >  >
> >> >>>>>>>>>>  >  >   I think only badness can happen from the cases above.
> >> >>>>>>>>>>  >
> >> >>>>>>>>>>  >  I'd suppose a system_reset should bring the system back to
> >> >>>>>>>>> sanity and
> >> >>>>>>>>>>  >  then clear vm_permanent_stopped (where's -ly?)
> >> >>>>>>>>>
> >> >>>>>>>>> What's -ly?
> >> >>>>>>>>>
> >> >>>>>>>>
> >> >>>>>>>> permanent-ly.
> >> >>>>>>>>
> >> >>>>>>>>>>  >  except maybe for KVM
> >> >>>>>>>>>>  >  internal error if that can't be recovered. Then it would not very
> >> >>>>>>>>>>  >  permanent anymore, so the name would need adjusting.
> >> >>>>>>>>>>
> >> >>>>>>>>>>  Currently, all kvm internal errors are recoverable by reset (and
> >> >>>>>>>>>>  possibly by fiddling with memory/registers).
> >> >>>>>>>>>
> >> >>>>>>>>> Ok, but a poweroff in the guest isn't recoverable with system_reset
> >> >>>>>>>>> right? Or does it depend on the guest?
> >> >>>>>>>>
> >> >>>>>>>> Right, it's not recoverable if you shut the power down where the tractor
> >> >>>>>>>> beam is coupled to the main reactor.
> >> >>>>>>>
> >> >>>>>>> system_reset will bring all emulated devices back into their power-on
> >> >>>>>>> state - unless we have remaining bugs to fix. Actually, one may consider
> >> >>>>>>> issuing this reset automatically on vm_start after "permant" vm_stop.
> >> >>>>
> >> >>>> The only permanent vm_stop we'd have is poweroff when -no-shutdown is used.
> >> >>>>
> >> >>>> Are you saying that system_reset should be able to recover from that too?
> >> >>>
> >> >>> It already does, so we don't have permanent stops.
> >> >>
> >> >> Exactly. We just have stops over inconsistent states that require a
> >> >> reset to continue with anything useful.
> >> >
> >> > Yes. If I got you right, you suggest that we do the reset automatically.
> >> >
> >> > I think it's better to let the user do it, because s/he might want to
> >> > do something else before resetting. For example, for the kvm error the
> >> > user might want to save the vm state.
> >>
> >> Associating the reset with a cont means requesting an explicit action
> >> from the user. I'm not suggesting to do the reset when the stop state is
> >> entered.
> >
> > I see. But automatically resetting on cont might be unexpected to the
> > user, even on a bad state.
> >
> > Another option would be to add a force option to cont, where the reset is
> > done when the state is invalid (otherwise cont will return an error).
> >
> > I still prefer to let the user do it manually though.
> >
> >> > For the poweroff case with -no-shutdown it's probably fine, but I don't
> >> > want to hard code special cases. It's better and easier to treat them all
> >> > as "require system_reset to recover".
> >>
> >> In any case, we need to tag the current state as stopped-and-invalid or
> >> so vs. a normal stop. That remains a valuable first step. How to deal
> >> with that information is the second one.
> 
> I think the right way to fix this is to disable 'cont' until a
> system_reset is issued. 'cont' should not perform reset but print an
> error message about inconsistent state and suggest issuing a
> 'system_reset'.

That's exactly what the series I'm working on does, although the error
message is not explicit about issuing a system_reset (will change).
Daniel P. Berrangé Aug. 2, 2011, 10:22 a.m. UTC | #17
On Thu, Jul 28, 2011 at 10:31:43AM -0300, Luiz Capitulino wrote:
> On Thu, 28 Jul 2011 10:23:22 +0300
> Avi Kivity <avi@redhat.com> wrote:
> 
> > On 07/28/2011 12:44 AM, Blue Swirl wrote:
> > > On Wed, Jul 27, 2011 at 9:42 PM, Luiz Capitulino<lcapitulino@redhat.com>  wrote:
> > > >  This function should be used when the VM is not supposed to resume
> > > >  execution (eg. by issuing 'cont' monitor command).
> > > >
> > > >  Today, we allow the user to resume execution even when:
> > > >
> > > >    o the guest shuts down and -no-shutdown is used
> > > >    o there's a kvm internal error
> > > >    o loading the VM state with -loadvm or "loadvm" in the monitor fails
> > > >
> > > >  I think only badness can happen from the cases above.
> > >
> > > I'd suppose a system_reset should bring the system back to sanity and
> > > then clear vm_permanent_stopped (where's -ly?)
> 
> What's -ly?
> 
> > > except maybe for KVM
> > > internal error if that can't be recovered. Then it would not very
> > > permanent anymore, so the name would need adjusting.
> > 
> > Currently, all kvm internal errors are recoverable by reset (and 
> > possibly by fiddling with memory/registers).
> 
> Ok, but a poweroff in the guest isn't recoverable with system_reset
> right? Or does it depend on the guest?
> 
> I get funny results if qemu is started with -no-shutdown and I run cont after
> a poweroff in a F15 guest. Sometimes qemu will exit after a few seconds,
> sometimes 'info status' will say 'running'.

libvirt uses this approach to fake a controlled soft reboot of guests.
eg

  $ qemu .... -no-shutdown
  ...some time later...
  system_shutdown
  ...wait for SHUTDOWN event...
  system_reset
  cont

Previous releases of QEMU had a bug in some device backends which would
cause a crash sometimes, but in general it ought to work IMHO.

Daniel
Luiz Capitulino Aug. 2, 2011, 7:07 p.m. UTC | #18
On Tue, 2 Aug 2011 11:22:15 +0100
"Daniel P. Berrange" <berrange@redhat.com> wrote:

> On Thu, Jul 28, 2011 at 10:31:43AM -0300, Luiz Capitulino wrote:
> > On Thu, 28 Jul 2011 10:23:22 +0300
> > Avi Kivity <avi@redhat.com> wrote:
> > 
> > > On 07/28/2011 12:44 AM, Blue Swirl wrote:
> > > > On Wed, Jul 27, 2011 at 9:42 PM, Luiz Capitulino<lcapitulino@redhat.com>  wrote:
> > > > >  This function should be used when the VM is not supposed to resume
> > > > >  execution (eg. by issuing 'cont' monitor command).
> > > > >
> > > > >  Today, we allow the user to resume execution even when:
> > > > >
> > > > >    o the guest shuts down and -no-shutdown is used
> > > > >    o there's a kvm internal error
> > > > >    o loading the VM state with -loadvm or "loadvm" in the monitor fails
> > > > >
> > > > >  I think only badness can happen from the cases above.
> > > >
> > > > I'd suppose a system_reset should bring the system back to sanity and
> > > > then clear vm_permanent_stopped (where's -ly?)
> > 
> > What's -ly?
> > 
> > > > except maybe for KVM
> > > > internal error if that can't be recovered. Then it would not very
> > > > permanent anymore, so the name would need adjusting.
> > > 
> > > Currently, all kvm internal errors are recoverable by reset (and 
> > > possibly by fiddling with memory/registers).
> > 
> > Ok, but a poweroff in the guest isn't recoverable with system_reset
> > right? Or does it depend on the guest?
> > 
> > I get funny results if qemu is started with -no-shutdown and I run cont after
> > a poweroff in a F15 guest. Sometimes qemu will exit after a few seconds,
> > sometimes 'info status' will say 'running'.
> 
> libvirt uses this approach to fake a controlled soft reboot of guests.
> eg
> 
>   $ qemu .... -no-shutdown
>   ...some time later...
>   system_shutdown
>   ...wait for SHUTDOWN event...
>   system_reset
>   cont
> 
> Previous releases of QEMU had a bug in some device backends which would
> cause a crash sometimes, but in general it ought to work IMHO.

Yes, you're doing a system_reset before cont, that's ok.

The problem happens when you issue cont w/o issuing system_reset first.
diff mbox

Patch

diff --git a/cpus.c b/cpus.c
index 6bf4e3f..5a4a480 100644
--- a/cpus.c
+++ b/cpus.c
@@ -535,6 +535,13 @@  static void qemu_tcg_init_cpu_signals(void)
 }
 #endif /* _WIN32 */
 
+void vm_stop_permanent(int reason)
+{
+    vm_stop(reason);
+    assert(!vm_permanent_stopped);
+    vm_permanent_stopped = 1;
+}
+
 #ifndef CONFIG_IOTHREAD
 int qemu_init_main_loop(void)
 {
diff --git a/kvm-all.c b/kvm-all.c
index cbc2532..4d0ceb3 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1014,7 +1014,7 @@  int kvm_cpu_exec(CPUState *env)
 
     if (ret < 0) {
         cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
-        vm_stop(VMSTOP_PANIC);
+        vm_stop_permanent(VMSTOP_PANIC);
     }
 
     env->exit_request = 0;
diff --git a/monitor.c b/monitor.c
index 718935b..86e146c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1307,7 +1307,10 @@  static int do_cont(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
     struct bdrv_iterate_context context = { mon, 0 };
 
-    if (incoming_expected) {
+    if (vm_permanent_stopped) {
+        qerror_report(QERR_VM_PERMANENT_STOPPED);
+        return -1;
+    } else if (incoming_expected) {
         qerror_report(QERR_MIGRATION_EXPECTED);
         return -1;
     }
@@ -2814,7 +2817,9 @@  static void do_loadvm(Monitor *mon, const QDict *qdict)
 
     vm_stop(VMSTOP_LOADVM);
 
-    if (load_vmstate(name) == 0 && saved_vm_running) {
+    if (load_vmstate(name) < 0) {
+        vm_permanent_stopped = 1;
+    } else if (saved_vm_running) {
         vm_start();
     }
 }
diff --git a/qerror.c b/qerror.c
index 69c1bc9..c91f763 100644
--- a/qerror.c
+++ b/qerror.c
@@ -219,6 +219,10 @@  static const QErrorStringTable qerror_table[] = {
                      "supported by this qemu version: %(feature)",
     },
     {
+        .error_fmt = QERR_VM_PERMANENT_STOPPED,
+        .desc      = "The Virtual Machine is permanently stopped",
+    },
+    {
         .error_fmt = QERR_VNC_SERVER_FAILED,
         .desc      = "Could not start VNC server on %(target)",
     },
diff --git a/qerror.h b/qerror.h
index 8058456..c759172 100644
--- a/qerror.h
+++ b/qerror.h
@@ -181,6 +181,9 @@  QError *qobject_to_qerror(const QObject *obj);
 #define QERR_UNKNOWN_BLOCK_FORMAT_FEATURE \
     "{ 'class': 'UnknownBlockFormatFeature', 'data': { 'device': %s, 'format': %s, 'feature': %s } }"
 
+#define QERR_VM_PERMANENT_STOPPED \
+    "{ 'class': 'PermanentStopped', 'data': {} }"
+
 #define QERR_VNC_SERVER_FAILED \
     "{ 'class': 'VNCServerFailed', 'data': { 'target': %s } }"
 
diff --git a/sysemu.h b/sysemu.h
index d3013f5..af90fa3 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -12,6 +12,7 @@ 
 extern const char *bios_name;
 
 extern int vm_running;
+extern int vm_permanent_stopped;
 extern const char *qemu_name;
 extern uint8_t qemu_uuid[];
 int qemu_uuid_parse(const char *str, uint8_t *uuid);
@@ -39,6 +40,7 @@  void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
 
 void vm_start(void);
 void vm_stop(int reason);
+void vm_stop_permanent(int reason);
 
 void qemu_system_reset_request(void);
 void qemu_system_shutdown_request(void);
diff --git a/vl.c b/vl.c
index 4b6688b..96403ac 100644
--- a/vl.c
+++ b/vl.c
@@ -186,6 +186,7 @@  NICInfo nd_table[MAX_NICS];
 int vm_running;
 int autostart;
 int incoming_expected; /* Started with -incoming and waiting for incoming */
+int vm_permanent_stopped = 0; /* qemu will never resume if set */
 static int rtc_utc = 1;
 static int rtc_date_offset = -1; /* -1 means no change */
 QEMUClock *rtc_clock;
@@ -1397,7 +1398,7 @@  static void main_loop(void)
             qemu_kill_report();
             monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
             if (no_shutdown) {
-                vm_stop(VMSTOP_SHUTDOWN);
+                vm_stop_permanent(VMSTOP_SHUTDOWN);
             } else
                 break;
         }
@@ -3315,6 +3316,7 @@  int main(int argc, char **argv, char **envp)
     qemu_system_reset(VMRESET_SILENT);
     if (loadvm) {
         if (load_vmstate(loadvm) < 0) {
+            vm_permanent_stopped = 1;
             autostart = 0;
         }
     }