Patchwork [v12,rebased] kvm: notify host when the guest is panicked

login
register
mail settings
Submitter Hu Tao
Date Jan. 23, 2013, 7:19 a.m.
Message ID <1358925575-4505-1-git-send-email-hutao@cn.fujitsu.com>
Download mbox | patch
Permalink /patch/214827/
State New
Headers show

Comments

Hu Tao - Jan. 23, 2013, 7:19 a.m.
We can know the guest is panicked when the guest runs on xen.
But we do not have such feature on kvm.

Another purpose of this feature is: management app(for example:
libvirt) can do auto dump when the guest is panicked. If management
app does not do auto dump, the guest's user can do dump by hand if
he sees the guest is panicked.

We have three solutions to implement this feature:
1. use vmcall
2. use I/O port
3. use virtio-serial.

We have decided to avoid touching hypervisor. The reason why I choose
choose the I/O port is:
1. it is easier to implememt
2. it does not depend any virtual device
3. it can work when starting the kernel

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 arch/ia64/kvm/irq.h                  | 19 +++++++++++++
 arch/powerpc/include/asm/kvm_para.h  | 18 ++++++++++++
 arch/s390/include/asm/kvm_para.h     | 19 +++++++++++++
 arch/x86/include/asm/kvm_para.h      | 20 ++++++++++++++
 arch/x86/include/uapi/asm/kvm_para.h |  2 ++
 arch/x86/kernel/kvm.c                | 53 ++++++++++++++++++++++++++++++++++++
 include/linux/kvm_para.h             | 18 ++++++++++++
 include/uapi/linux/kvm_para.h        |  6 ++++
 kernel/panic.c                       |  4 +++
 9 files changed, 159 insertions(+)
Marcelo Tosatti - Feb. 8, 2013, 1:39 a.m.
Hi,

On Wed, Jan 23, 2013 at 03:19:21PM +0800, Hu Tao wrote:
> We can know the guest is panicked when the guest runs on xen.
> But we do not have such feature on kvm.
> 
> Another purpose of this feature is: management app(for example:
> libvirt) can do auto dump when the guest is panicked. If management
> app does not do auto dump, the guest's user can do dump by hand if
> he sees the guest is panicked.
> 
> We have three solutions to implement this feature:
> 1. use vmcall
> 2. use I/O port
> 3. use virtio-serial.
> 
> We have decided to avoid touching hypervisor. The reason why I choose
> choose the I/O port is:
> 1. it is easier to implememt
> 2. it does not depend any virtual device
> 3. it can work when starting the kernel
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  arch/ia64/kvm/irq.h                  | 19 +++++++++++++
>  arch/powerpc/include/asm/kvm_para.h  | 18 ++++++++++++
>  arch/s390/include/asm/kvm_para.h     | 19 +++++++++++++
>  arch/x86/include/asm/kvm_para.h      | 20 ++++++++++++++
>  arch/x86/include/uapi/asm/kvm_para.h |  2 ++
>  arch/x86/kernel/kvm.c                | 53 ++++++++++++++++++++++++++++++++++++
>  include/linux/kvm_para.h             | 18 ++++++++++++
>  include/uapi/linux/kvm_para.h        |  6 ++++
>  kernel/panic.c                       |  4 +++
>  9 files changed, 159 insertions(+)
> 
> diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
> index c0785a7..b3870f8 100644
> --- a/arch/ia64/kvm/irq.h
> +++ b/arch/ia64/kvm/irq.h
> @@ -30,4 +30,23 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  	return 1;
>  }
>  
> +static inline int kvm_arch_pv_event_init(void)
> +{
> +	return 0;
> +}
> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> +	return 0;
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> +}
> +
> +static inline bool kvm_arch_pv_event_enabled(void)
> +{
> +	return false;
> +}
> +

The interface is x86 only, no need to touch other architectures.

>  #endif
> diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
> index 2b11965..17dd013 100644
> --- a/arch/powerpc/include/asm/kvm_para.h
> +++ b/arch/powerpc/include/asm/kvm_para.h
> @@ -144,4 +144,22 @@ static inline bool kvm_check_and_clear_guest_paused(void)
>  	return false;
>  }
>  
> +static inline int kvm_arch_pv_event_init(void)
> +{
> +	return 0;
> +}
> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> +	return 0;
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> +}
> +
> +static inline bool kvm_arch_pv_event_enabled(void)
> +{
> +	return false;
> +}
>  #endif /* __POWERPC_KVM_PARA_H__ */
> diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
> index e0f8423..81d87ec 100644
> --- a/arch/s390/include/asm/kvm_para.h
> +++ b/arch/s390/include/asm/kvm_para.h
> @@ -154,4 +154,23 @@ static inline bool kvm_check_and_clear_guest_paused(void)
>  	return false;
>  }
>  
> +static inline int kvm_arch_pv_event_init(void)
> +{
> +	return 0;
> +}
> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> +	return 0;
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> +}
> +
> +static inline bool kvm_arch_pv_event_enabled(void)
> +{
> +	return false;
> +}
> +
>  #endif /* __S390_KVM_PARA_H */
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -133,4 +133,24 @@ static inline void kvm_disable_steal_time(void)
>  }
>  #endif
>  
> +static inline int kvm_arch_pv_event_init(void)
> +{
> +	if (!request_region(KVM_PV_EVENT_PORT, 4, "KVM_PV_EVENT"))
> +		return -1;
> +
> +	return 0;
> +}

This should be in a driver in arch/x86/kernel/kvm-panic.c, or so.

> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> +	return inl(KVM_PV_EVENT_PORT);
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> +	outl(event, KVM_PV_EVENT_PORT);
> +}

> +
> +bool kvm_arch_pv_event_enabled(void);
> +
>  #endif /* _ASM_X86_KVM_PARA_H */
> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> index 06fdbd9..c15ef33 100644
> --- a/arch/x86/include/uapi/asm/kvm_para.h
> +++ b/arch/x86/include/uapi/asm/kvm_para.h
> @@ -96,5 +96,7 @@ struct kvm_vcpu_pv_apf_data {
>  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
>  #define KVM_PV_EOI_DISABLED 0x0
>  
> +#define KVM_PV_EVENT_PORT	(0x505UL)
> +

No need for the ioport to be hard coded. What are the options to
communicate an address to the guest? An MSR, via ACPI?

>  
>  #endif /* _UAPI_ASM_X86_KVM_PARA_H */
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 9c2bd8b..0aa7b3e 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -73,6 +73,20 @@ static int parse_no_kvmclock_vsyscall(char *arg)
>  
>  early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
>  
> +static int pv_event = 1;
> +static int parse_no_pv_event(char *arg)
> +{
> +	pv_event = 0;
> +	return 0;
> +}
> +
> +bool kvm_arch_pv_event_enabled(void)
> +{
> +	return !!pv_event;
> +}
> +
> +early_param("no-pv-event", parse_no_pv_event);
> +

"pv-event" is a bad name for an interface which is specific to notify
panic events. Please use pv-panic everywhere.

>  static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
>  static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
>  static int has_steal_clock = 0;
> @@ -385,6 +399,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>  	.notifier_call = kvm_pv_reboot_notify,
>  };
>  
> +static int
> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
> +{
> +	kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
> +	return NOTIFY_DONE;
> +}

Why 'eject' ?

> +
> +static struct notifier_block kvm_pv_panic_nb = {
> +	.notifier_call = kvm_pv_panic_notify,
> +};
> +
>  static u64 kvm_steal_clock(int cpu)
>  {
>  	u64 steal;
> @@ -462,6 +487,34 @@ static void __init kvm_apf_trap_init(void)
>  	set_intr_gate(14, &async_page_fault);
>  }
>  
> +static void __init kvm_pv_panicked_event_init(void)
> +{
> +	if (!kvm_para_available())
> +		return;
> +
> +	if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
> +		atomic_notifier_chain_register(&panic_notifier_list,
> +			&kvm_pv_panic_nb);
> +}
> +
> +static inline int kvm_pv_event_init(void)
> +{
> +	return kvm_arch_pv_event_init();
> +}
> +
> +static int __init enable_pv_event(void)
> +{
> +	if (pv_event) {
> +		if (kvm_pv_event_init())
> +			return 0;
> +
> +		kvm_pv_panicked_event_init();
> +	}
> +
> +	return 0;
> +}
> +arch_initcall(enable_pv_event);

Call the initialization code from kvm_guest_init, only one function is
necessary.

> +
>  void __init kvm_guest_init(void)
>  {
>  	int i;
> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
> index 00a97bb..6fb6198 100644
> --- a/include/linux/kvm_para.h
> +++ b/include/linux/kvm_para.h
> @@ -10,4 +10,22 @@ static inline int kvm_para_has_feature(unsigned int feature)
>  		return 1;
>  	return 0;
>  }
> +
> +static inline int kvm_pv_has_feature(unsigned int feature)
> +{
> +	if (kvm_arch_pv_features() & (1UL << feature))
> +		return 1;
> +	return 0;
> +}
> +
> +static inline void kvm_pv_eject_event(unsigned int event)
> +{
> +	kvm_arch_pv_eject_event(event);
> +}
> +
> +static inline bool kvm_pv_event_enabled(void)
> +{
> +	return kvm_arch_pv_event_enabled();
> +}

No need for this helpers, as noted.

>  #endif /* __LINUX_KVM_PARA_H */
> diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
> index cea2c5c..c41ddce 100644
> --- a/include/uapi/linux/kvm_para.h
> +++ b/include/uapi/linux/kvm_para.h
> @@ -20,6 +20,12 @@
>  #define KVM_HC_FEATURES			3
>  #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
>  
> +/* The bit of supported pv event */
> +#define KVM_PV_FEATURE_PANICKED	0
> +
> +/* The pv event value */
> +#define KVM_PV_EVENT_PANICKED	1
> +

This is a hypercall header. You want

arch/x86/include/asm/kvm_para.h

>  /*
>   * hypercalls use architecture specific
>   */
> diff --git a/kernel/panic.c b/kernel/panic.c
> index e1b2822..a764d2e 100644
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -23,6 +23,7 @@
>  #include <linux/init.h>
>  #include <linux/nmi.h>
>  #include <linux/dmi.h>
> +#include <linux/kvm_para.h>
>  
>  #define PANIC_TIMER_STEP 100
>  #define PANIC_BLINK_SPD 18
> @@ -132,6 +133,9 @@ void panic(const char *fmt, ...)
>  	if (!panic_blink)
>  		panic_blink = no_blink;
>  
> +	if (kvm_pv_event_enabled())
> +		panic_timeout = 0;
> +

What is the rationale behind this?
Hu Tao - Feb. 28, 2013, 8:54 a.m.
On Thu, Feb 07, 2013 at 11:39:47PM -0200, Marcelo Tosatti wrote:
> Hi,
> 
> On Wed, Jan 23, 2013 at 03:19:21PM +0800, Hu Tao wrote:
> > We can know the guest is panicked when the guest runs on xen.
> > But we do not have such feature on kvm.
> > 
> > Another purpose of this feature is: management app(for example:
> > libvirt) can do auto dump when the guest is panicked. If management
> > app does not do auto dump, the guest's user can do dump by hand if
> > he sees the guest is panicked.
> > 
> > We have three solutions to implement this feature:
> > 1. use vmcall
> > 2. use I/O port
> > 3. use virtio-serial.
> > 
> > We have decided to avoid touching hypervisor. The reason why I choose
> > choose the I/O port is:
> > 1. it is easier to implememt
> > 2. it does not depend any virtual device
> > 3. it can work when starting the kernel
> > 
> > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > ---
> >  arch/ia64/kvm/irq.h                  | 19 +++++++++++++
> >  arch/powerpc/include/asm/kvm_para.h  | 18 ++++++++++++
> >  arch/s390/include/asm/kvm_para.h     | 19 +++++++++++++
> >  arch/x86/include/asm/kvm_para.h      | 20 ++++++++++++++
> >  arch/x86/include/uapi/asm/kvm_para.h |  2 ++
> >  arch/x86/kernel/kvm.c                | 53 ++++++++++++++++++++++++++++++++++++
> >  include/linux/kvm_para.h             | 18 ++++++++++++
> >  include/uapi/linux/kvm_para.h        |  6 ++++
> >  kernel/panic.c                       |  4 +++
> >  9 files changed, 159 insertions(+)
> > 
> > diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
> > index c0785a7..b3870f8 100644
> > --- a/arch/ia64/kvm/irq.h
> > +++ b/arch/ia64/kvm/irq.h
> > @@ -30,4 +30,23 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
> >  	return 1;
> >  }
> >  
> > +static inline int kvm_arch_pv_event_init(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline unsigned int kvm_arch_pv_features(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline void kvm_arch_pv_eject_event(unsigned int event)
> > +{
> > +}
> > +
> > +static inline bool kvm_arch_pv_event_enabled(void)
> > +{
> > +	return false;
> > +}
> > +
> 
> The interface is x86 only, no need to touch other architectures.

OK.

> 
> >  #endif
> > diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
> > index 2b11965..17dd013 100644
> > --- a/arch/powerpc/include/asm/kvm_para.h
> > +++ b/arch/powerpc/include/asm/kvm_para.h
> > @@ -144,4 +144,22 @@ static inline bool kvm_check_and_clear_guest_paused(void)
> >  	return false;
> >  }
> >  
> > +static inline int kvm_arch_pv_event_init(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline unsigned int kvm_arch_pv_features(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline void kvm_arch_pv_eject_event(unsigned int event)
> > +{
> > +}
> > +
> > +static inline bool kvm_arch_pv_event_enabled(void)
> > +{
> > +	return false;
> > +}
> >  #endif /* __POWERPC_KVM_PARA_H__ */
> > diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
> > index e0f8423..81d87ec 100644
> > --- a/arch/s390/include/asm/kvm_para.h
> > +++ b/arch/s390/include/asm/kvm_para.h
> > @@ -154,4 +154,23 @@ static inline bool kvm_check_and_clear_guest_paused(void)
> >  	return false;
> >  }
> >  
> > +static inline int kvm_arch_pv_event_init(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline unsigned int kvm_arch_pv_features(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline void kvm_arch_pv_eject_event(unsigned int event)
> > +{
> > +}
> > +
> > +static inline bool kvm_arch_pv_event_enabled(void)
> > +{
> > +	return false;
> > +}
> > +
> >  #endif /* __S390_KVM_PARA_H */
> > --- a/arch/x86/include/asm/kvm_para.h
> > +++ b/arch/x86/include/asm/kvm_para.h
> > @@ -133,4 +133,24 @@ static inline void kvm_disable_steal_time(void)
> >  }
> >  #endif
> >  
> > +static inline int kvm_arch_pv_event_init(void)
> > +{
> > +	if (!request_region(KVM_PV_EVENT_PORT, 4, "KVM_PV_EVENT"))
> > +		return -1;
> > +
> > +	return 0;
> > +}
> 
> This should be in a driver in arch/x86/kernel/kvm-panic.c, or so.
> 
> > +
> > +static inline unsigned int kvm_arch_pv_features(void)
> > +{
> > +	return inl(KVM_PV_EVENT_PORT);
> > +}
> > +
> > +static inline void kvm_arch_pv_eject_event(unsigned int event)
> > +{
> > +	outl(event, KVM_PV_EVENT_PORT);
> > +}
> 
> > +
> > +bool kvm_arch_pv_event_enabled(void);
> > +
> >  #endif /* _ASM_X86_KVM_PARA_H */
> > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> > index 06fdbd9..c15ef33 100644
> > --- a/arch/x86/include/uapi/asm/kvm_para.h
> > +++ b/arch/x86/include/uapi/asm/kvm_para.h
> > @@ -96,5 +96,7 @@ struct kvm_vcpu_pv_apf_data {
> >  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> >  #define KVM_PV_EOI_DISABLED 0x0
> >  
> > +#define KVM_PV_EVENT_PORT	(0x505UL)
> > +
> 
> No need for the ioport to be hard coded. What are the options to
> communicate an address to the guest? An MSR, via ACPI?

I'm not quite understanding here. By 'address', you mean an ioport?
how to communicate an address? (I have little knowledge about ACPI)

> 
> >  
> >  #endif /* _UAPI_ASM_X86_KVM_PARA_H */
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index 9c2bd8b..0aa7b3e 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -73,6 +73,20 @@ static int parse_no_kvmclock_vsyscall(char *arg)
> >  
> >  early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
> >  
> > +static int pv_event = 1;
> > +static int parse_no_pv_event(char *arg)
> > +{
> > +	pv_event = 0;
> > +	return 0;
> > +}
> > +
> > +bool kvm_arch_pv_event_enabled(void)
> > +{
> > +	return !!pv_event;
> > +}
> > +
> > +early_param("no-pv-event", parse_no_pv_event);
> > +
> 
> "pv-event" is a bad name for an interface which is specific to notify
> panic events. Please use pv-panic everywhere.

panic event is one of the events supported. Can we keep the name?

> 
> >  static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
> >  static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
> >  static int has_steal_clock = 0;
> > @@ -385,6 +399,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
> >  	.notifier_call = kvm_pv_reboot_notify,
> >  };
> >  
> > +static int
> > +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
> > +{
> > +	kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
> > +	return NOTIFY_DONE;
> > +}
> 
> Why 'eject' ?

changed to 'send'

> 
> > +
> > +static struct notifier_block kvm_pv_panic_nb = {
> > +	.notifier_call = kvm_pv_panic_notify,
> > +};
> > +
> >  static u64 kvm_steal_clock(int cpu)
> >  {
> >  	u64 steal;
> > @@ -462,6 +487,34 @@ static void __init kvm_apf_trap_init(void)
> >  	set_intr_gate(14, &async_page_fault);
> >  }
> >  
> > +static void __init kvm_pv_panicked_event_init(void)
> > +{
> > +	if (!kvm_para_available())
> > +		return;
> > +
> > +	if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
> > +		atomic_notifier_chain_register(&panic_notifier_list,
> > +			&kvm_pv_panic_nb);
> > +}
> > +
> > +static inline int kvm_pv_event_init(void)
> > +{
> > +	return kvm_arch_pv_event_init();
> > +}
> > +
> > +static int __init enable_pv_event(void)
> > +{
> > +	if (pv_event) {
> > +		if (kvm_pv_event_init())
> > +			return 0;
> > +
> > +		kvm_pv_panicked_event_init();
> > +	}
> > +
> > +	return 0;
> > +}
> > +arch_initcall(enable_pv_event);
> 
> Call the initialization code from kvm_guest_init, only one function is
> necessary.

At the point of kvm_guest_init, rqeust_region (called by
kvm_pv_event_init) will block, so the guest kernel won't up.

> 
> > +
> >  void __init kvm_guest_init(void)
> >  {
> >  	int i;
> > diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
> > index 00a97bb..6fb6198 100644
> > --- a/include/linux/kvm_para.h
> > +++ b/include/linux/kvm_para.h
> > @@ -10,4 +10,22 @@ static inline int kvm_para_has_feature(unsigned int feature)
> >  		return 1;
> >  	return 0;
> >  }
> > +
> > +static inline int kvm_pv_has_feature(unsigned int feature)
> > +{
> > +	if (kvm_arch_pv_features() & (1UL << feature))
> > +		return 1;
> > +	return 0;
> > +}
> > +
> > +static inline void kvm_pv_eject_event(unsigned int event)
> > +{
> > +	kvm_arch_pv_eject_event(event);
> > +}
> > +
> > +static inline bool kvm_pv_event_enabled(void)
> > +{
> > +	return kvm_arch_pv_event_enabled();
> > +}
> 
> No need for this helpers, as noted.

OK.

> 
> >  #endif /* __LINUX_KVM_PARA_H */
> > diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
> > index cea2c5c..c41ddce 100644
> > --- a/include/uapi/linux/kvm_para.h
> > +++ b/include/uapi/linux/kvm_para.h
> > @@ -20,6 +20,12 @@
> >  #define KVM_HC_FEATURES			3
> >  #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
> >  
> > +/* The bit of supported pv event */
> > +#define KVM_PV_FEATURE_PANICKED	0
> > +
> > +/* The pv event value */
> > +#define KVM_PV_EVENT_PANICKED	1
> > +
> 
> This is a hypercall header. You want
> 
> arch/x86/include/asm/kvm_para.h

OK.

> 
> >  /*
> >   * hypercalls use architecture specific
> >   */
> > diff --git a/kernel/panic.c b/kernel/panic.c
> > index e1b2822..a764d2e 100644
> > --- a/kernel/panic.c
> > +++ b/kernel/panic.c
> > @@ -23,6 +23,7 @@
> >  #include <linux/init.h>
> >  #include <linux/nmi.h>
> >  #include <linux/dmi.h>
> > +#include <linux/kvm_para.h>
> >  
> >  #define PANIC_TIMER_STEP 100
> >  #define PANIC_BLINK_SPD 18
> > @@ -132,6 +133,9 @@ void panic(const char *fmt, ...)
> >  	if (!panic_blink)
> >  		panic_blink = no_blink;
> >  
> > +	if (kvm_pv_event_enabled())
> > +		panic_timeout = 0;
> > +
> 
> What is the rationale behind this?

This is a hack to disable reset_on_panic if user enables
pv-event.
Marcelo Tosatti - March 2, 2013, 12:03 a.m.
On Thu, Feb 28, 2013 at 04:54:25PM +0800, Hu Tao wrote:
> > > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> > > index 06fdbd9..c15ef33 100644
> > > --- a/arch/x86/include/uapi/asm/kvm_para.h
> > > +++ b/arch/x86/include/uapi/asm/kvm_para.h
> > > @@ -96,5 +96,7 @@ struct kvm_vcpu_pv_apf_data {
> > >  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> > >  #define KVM_PV_EOI_DISABLED 0x0
> > >  
> > > +#define KVM_PV_EVENT_PORT	(0x505UL)
> > > +
> > 
> > No need for the ioport to be hard coded. What are the options to
> > communicate an address to the guest? An MSR, via ACPI?
> 
> I'm not quite understanding here. By 'address', you mean an ioport?
> how to communicate an address? (I have little knowledge about ACPI)

Yes, the ioport. The address of the ioport should not be fixed (for
example future emulated board could use that fixed ioport address,
0x505UL).

One option is to pass the address via an MSR. Yes, that is probably the
best option because there is no dependency on ACPI.

> > "pv-event" is a bad name for an interface which is specific to notify
> > panic events. Please use pv-panic everywhere.
> 
> panic event is one of the events supported. Can we keep the name?

> > Call the initialization code from kvm_guest_init, only one function is
> > necessary.
> 
> At the point of kvm_guest_init, rqeust_region (called by
> kvm_pv_event_init) will block, so the guest kernel won't up.

Why does it block?

> > >  #define PANIC_TIMER_STEP 100
> > >  #define PANIC_BLINK_SPD 18
> > > @@ -132,6 +133,9 @@ void panic(const char *fmt, ...)
> > >  	if (!panic_blink)
> > >  		panic_blink = no_blink;
> > >  
> > > +	if (kvm_pv_event_enabled())
> > > +		panic_timeout = 0;
> > > +
> > 
> > What is the rationale behind this?
> 
> This is a hack to disable reset_on_panic if user enables
> pv-event.

Condition it to kvm_pv_event_enabled() directly?
Gleb Natapov - March 3, 2013, 1 p.m.
On Fri, Mar 01, 2013 at 09:03:12PM -0300, Marcelo Tosatti wrote:
> On Thu, Feb 28, 2013 at 04:54:25PM +0800, Hu Tao wrote:
> > > > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> > > > index 06fdbd9..c15ef33 100644
> > > > --- a/arch/x86/include/uapi/asm/kvm_para.h
> > > > +++ b/arch/x86/include/uapi/asm/kvm_para.h
> > > > @@ -96,5 +96,7 @@ struct kvm_vcpu_pv_apf_data {
> > > >  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> > > >  #define KVM_PV_EOI_DISABLED 0x0
> > > >  
> > > > +#define KVM_PV_EVENT_PORT	(0x505UL)
> > > > +
> > > 
> > > No need for the ioport to be hard coded. What are the options to
> > > communicate an address to the guest? An MSR, via ACPI?
> > 
> > I'm not quite understanding here. By 'address', you mean an ioport?
> > how to communicate an address? (I have little knowledge about ACPI)
> 
> Yes, the ioport. The address of the ioport should not be fixed (for
> example future emulated board could use that fixed ioport address,
> 0x505UL).
> 
> One option is to pass the address via an MSR. Yes, that is probably the
> best option because there is no dependency on ACPI.
> 
Why dependency on ACPI is problematic? ACPI is the standard way on x86
to enumerate platform devices. Passing it through MSR makes this panic
device CPU interface which it is not. And since relying on #GP to detect
valid MSRs is not good interface we will have to guard it by cpuid bit.

--
			Gleb.
Marcelo Tosatti - March 3, 2013, 10:29 p.m.
On Sun, Mar 03, 2013 at 03:00:22PM +0200, Gleb Natapov wrote:
> On Fri, Mar 01, 2013 at 09:03:12PM -0300, Marcelo Tosatti wrote:
> > On Thu, Feb 28, 2013 at 04:54:25PM +0800, Hu Tao wrote:
> > > > > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> > > > > index 06fdbd9..c15ef33 100644
> > > > > --- a/arch/x86/include/uapi/asm/kvm_para.h
> > > > > +++ b/arch/x86/include/uapi/asm/kvm_para.h
> > > > > @@ -96,5 +96,7 @@ struct kvm_vcpu_pv_apf_data {
> > > > >  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> > > > >  #define KVM_PV_EOI_DISABLED 0x0
> > > > >  
> > > > > +#define KVM_PV_EVENT_PORT	(0x505UL)
> > > > > +
> > > > 
> > > > No need for the ioport to be hard coded. What are the options to
> > > > communicate an address to the guest? An MSR, via ACPI?
> > > 
> > > I'm not quite understanding here. By 'address', you mean an ioport?
> > > how to communicate an address? (I have little knowledge about ACPI)
> > 
> > Yes, the ioport. The address of the ioport should not be fixed (for
> > example future emulated board could use that fixed ioport address,
> > 0x505UL).
> > 
> > One option is to pass the address via an MSR. Yes, that is probably the
> > best option because there is no dependency on ACPI.
> > 
> Why dependency on ACPI is problematic? ACPI is the standard way on x86
> to enumerate platform devices. Passing it through MSR makes this panic
> device CPU interface which it is not. And since relying on #GP to detect
> valid MSRs is not good interface we will have to guard it by cpuid bit.
> 
> --
> 			Gleb.

KVM guest <-> KVM host interface is not dependent on ACPI, so far. Say,
its possible to use a Linux guest without ACPI and have KVM paravirt 
fully functional.
Gleb Natapov - March 4, 2013, 5:49 p.m.
On Sun, Mar 03, 2013 at 07:29:53PM -0300, Marcelo Tosatti wrote:
> On Sun, Mar 03, 2013 at 03:00:22PM +0200, Gleb Natapov wrote:
> > On Fri, Mar 01, 2013 at 09:03:12PM -0300, Marcelo Tosatti wrote:
> > > On Thu, Feb 28, 2013 at 04:54:25PM +0800, Hu Tao wrote:
> > > > > > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> > > > > > index 06fdbd9..c15ef33 100644
> > > > > > --- a/arch/x86/include/uapi/asm/kvm_para.h
> > > > > > +++ b/arch/x86/include/uapi/asm/kvm_para.h
> > > > > > @@ -96,5 +96,7 @@ struct kvm_vcpu_pv_apf_data {
> > > > > >  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> > > > > >  #define KVM_PV_EOI_DISABLED 0x0
> > > > > >  
> > > > > > +#define KVM_PV_EVENT_PORT	(0x505UL)
> > > > > > +
> > > > > 
> > > > > No need for the ioport to be hard coded. What are the options to
> > > > > communicate an address to the guest? An MSR, via ACPI?
> > > > 
> > > > I'm not quite understanding here. By 'address', you mean an ioport?
> > > > how to communicate an address? (I have little knowledge about ACPI)
> > > 
> > > Yes, the ioport. The address of the ioport should not be fixed (for
> > > example future emulated board could use that fixed ioport address,
> > > 0x505UL).
> > > 
> > > One option is to pass the address via an MSR. Yes, that is probably the
> > > best option because there is no dependency on ACPI.
> > > 
> > Why dependency on ACPI is problematic? ACPI is the standard way on x86
> > to enumerate platform devices. Passing it through MSR makes this panic
> > device CPU interface which it is not. And since relying on #GP to detect
> > valid MSRs is not good interface we will have to guard it by cpuid bit.
> > 
> > --
> > 			Gleb.
> 
> KVM guest <-> KVM host interface is not dependent on ACPI, so far. Say,
> its possible to use a Linux guest without ACPI and have KVM paravirt 
> fully functional.
This is not KVM guest <-> KVM host interface though. This is yet another
device. We could implement real impi device that have crash reporting
capability, but decided to go with something simpler. Without ACPI guest
will not be able to power down itself too, but this is not the reason
for us to introduce non-ACPI interface for power down.

--
			Gleb.
Gleb Natapov - March 5, 2013, 7:05 a.m.
On Mon, Mar 04, 2013 at 05:43:48PM -0300, Marcelo Tosatti wrote:
> On Mon, Mar 04, 2013 at 07:49:13PM +0200, Gleb Natapov wrote:
> > On Sun, Mar 03, 2013 at 07:29:53PM -0300, Marcelo Tosatti wrote:
> > > On Sun, Mar 03, 2013 at 03:00:22PM +0200, Gleb Natapov wrote:
> > > > On Fri, Mar 01, 2013 at 09:03:12PM -0300, Marcelo Tosatti wrote:
> > > > > On Thu, Feb 28, 2013 at 04:54:25PM +0800, Hu Tao wrote:
> > > > > > > > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> > > > > > > > index 06fdbd9..c15ef33 100644
> > > > > > > > --- a/arch/x86/include/uapi/asm/kvm_para.h
> > > > > > > > +++ b/arch/x86/include/uapi/asm/kvm_para.h
> > > > > > > > @@ -96,5 +96,7 @@ struct kvm_vcpu_pv_apf_data {
> > > > > > > >  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> > > > > > > >  #define KVM_PV_EOI_DISABLED 0x0
> > > > > > > >  
> > > > > > > > +#define KVM_PV_EVENT_PORT	(0x505UL)
> > > > > > > > +
> > > > > > > 
> > > > > > > No need for the ioport to be hard coded. What are the options to
> > > > > > > communicate an address to the guest? An MSR, via ACPI?
> > > > > > 
> > > > > > I'm not quite understanding here. By 'address', you mean an ioport?
> > > > > > how to communicate an address? (I have little knowledge about ACPI)
> > > > > 
> > > > > Yes, the ioport. The address of the ioport should not be fixed (for
> > > > > example future emulated board could use that fixed ioport address,
> > > > > 0x505UL).
> > > > > 
> > > > > One option is to pass the address via an MSR. Yes, that is probably the
> > > > > best option because there is no dependency on ACPI.
> > > > > 
> > > > Why dependency on ACPI is problematic? ACPI is the standard way on x86
> > > > to enumerate platform devices. Passing it through MSR makes this panic
> > > > device CPU interface which it is not. And since relying on #GP to detect
> > > > valid MSRs is not good interface we will have to guard it by cpuid bit.
> > > > 
> > > > --
> > > > 			Gleb.
> > > 
> > > KVM guest <-> KVM host interface is not dependent on ACPI, so far. Say,
> > > its possible to use a Linux guest without ACPI and have KVM paravirt 
> > > fully functional.
> > This is not KVM guest <-> KVM host interface though. This is yet another
> > device. We could implement real impi device that have crash reporting
> > capability, but decided to go with something simpler. Without ACPI guest
> > will not be able to power down itself too, but this is not the reason
> > for us to introduce non-ACPI interface for power down.
> 
> Sure (its more of an aesthetic/organizational point, i guess).
> 
> Anyway, one problem with ACPI is whether its initialized early enough
> (which is the whole point of PIO the x86 specific interface).
ACPI is needed pretty early in the boot process.

--
			Gleb.

Patch

diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
index c0785a7..b3870f8 100644
--- a/arch/ia64/kvm/irq.h
+++ b/arch/ia64/kvm/irq.h
@@ -30,4 +30,23 @@  static inline int irqchip_in_kernel(struct kvm *kvm)
 	return 1;
 }
 
+static inline int kvm_arch_pv_event_init(void)
+{
+	return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+	return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
+static inline bool kvm_arch_pv_event_enabled(void)
+{
+	return false;
+}
+
 #endif
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 2b11965..17dd013 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -144,4 +144,22 @@  static inline bool kvm_check_and_clear_guest_paused(void)
 	return false;
 }
 
+static inline int kvm_arch_pv_event_init(void)
+{
+	return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+	return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
+static inline bool kvm_arch_pv_event_enabled(void)
+{
+	return false;
+}
 #endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index e0f8423..81d87ec 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -154,4 +154,23 @@  static inline bool kvm_check_and_clear_guest_paused(void)
 	return false;
 }
 
+static inline int kvm_arch_pv_event_init(void)
+{
+	return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+	return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
+static inline bool kvm_arch_pv_event_enabled(void)
+{
+	return false;
+}
+
 #endif /* __S390_KVM_PARA_H */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed1f161..c3f2ca8 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -133,4 +133,24 @@  static inline void kvm_disable_steal_time(void)
 }
 #endif
 
+static inline int kvm_arch_pv_event_init(void)
+{
+	if (!request_region(KVM_PV_EVENT_PORT, 4, "KVM_PV_EVENT"))
+		return -1;
+
+	return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+	return inl(KVM_PV_EVENT_PORT);
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+	outl(event, KVM_PV_EVENT_PORT);
+}
+
+bool kvm_arch_pv_event_enabled(void);
+
 #endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 06fdbd9..c15ef33 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -96,5 +96,7 @@  struct kvm_vcpu_pv_apf_data {
 #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
 #define KVM_PV_EOI_DISABLED 0x0
 
+#define KVM_PV_EVENT_PORT	(0x505UL)
+
 
 #endif /* _UAPI_ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9c2bd8b..0aa7b3e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -73,6 +73,20 @@  static int parse_no_kvmclock_vsyscall(char *arg)
 
 early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
 
+static int pv_event = 1;
+static int parse_no_pv_event(char *arg)
+{
+	pv_event = 0;
+	return 0;
+}
+
+bool kvm_arch_pv_event_enabled(void)
+{
+	return !!pv_event;
+}
+
+early_param("no-pv-event", parse_no_pv_event);
+
 static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
 static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
 static int has_steal_clock = 0;
@@ -385,6 +399,17 @@  static struct notifier_block kvm_pv_reboot_nb = {
 	.notifier_call = kvm_pv_reboot_notify,
 };
 
+static int
+kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
+{
+	kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kvm_pv_panic_nb = {
+	.notifier_call = kvm_pv_panic_notify,
+};
+
 static u64 kvm_steal_clock(int cpu)
 {
 	u64 steal;
@@ -462,6 +487,34 @@  static void __init kvm_apf_trap_init(void)
 	set_intr_gate(14, &async_page_fault);
 }
 
+static void __init kvm_pv_panicked_event_init(void)
+{
+	if (!kvm_para_available())
+		return;
+
+	if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
+		atomic_notifier_chain_register(&panic_notifier_list,
+			&kvm_pv_panic_nb);
+}
+
+static inline int kvm_pv_event_init(void)
+{
+	return kvm_arch_pv_event_init();
+}
+
+static int __init enable_pv_event(void)
+{
+	if (pv_event) {
+		if (kvm_pv_event_init())
+			return 0;
+
+		kvm_pv_panicked_event_init();
+	}
+
+	return 0;
+}
+arch_initcall(enable_pv_event);
+
 void __init kvm_guest_init(void)
 {
 	int i;
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 00a97bb..6fb6198 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -10,4 +10,22 @@  static inline int kvm_para_has_feature(unsigned int feature)
 		return 1;
 	return 0;
 }
+
+static inline int kvm_pv_has_feature(unsigned int feature)
+{
+	if (kvm_arch_pv_features() & (1UL << feature))
+		return 1;
+	return 0;
+}
+
+static inline void kvm_pv_eject_event(unsigned int event)
+{
+	kvm_arch_pv_eject_event(event);
+}
+
+static inline bool kvm_pv_event_enabled(void)
+{
+	return kvm_arch_pv_event_enabled();
+}
+
 #endif /* __LINUX_KVM_PARA_H */
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index cea2c5c..c41ddce 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -20,6 +20,12 @@ 
 #define KVM_HC_FEATURES			3
 #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
 
+/* The bit of supported pv event */
+#define KVM_PV_FEATURE_PANICKED	0
+
+/* The pv event value */
+#define KVM_PV_EVENT_PANICKED	1
+
 /*
  * hypercalls use architecture specific
  */
diff --git a/kernel/panic.c b/kernel/panic.c
index e1b2822..a764d2e 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -23,6 +23,7 @@ 
 #include <linux/init.h>
 #include <linux/nmi.h>
 #include <linux/dmi.h>
+#include <linux/kvm_para.h>
 
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
@@ -132,6 +133,9 @@  void panic(const char *fmt, ...)
 	if (!panic_blink)
 		panic_blink = no_blink;
 
+	if (kvm_pv_event_enabled())
+		panic_timeout = 0;
+
 	if (panic_timeout > 0) {
 		/*
 		 * Delay timeout seconds before rebooting the machine.