diff mbox

kvm: notify host when guest paniced

Message ID 4F4AF1FB.6000903@cn.fujitsu.com
State New
Headers show

Commit Message

Wen Congyang Feb. 27, 2012, 3:01 a.m. UTC
We can know the guest is paniced when the guest runs on xen.
But we do not have such feature on kvm. This patch implemnts
this feature, and the implementation is the same as xen:
register panic notifier, and call hypercall when the guest
is paniced.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 arch/x86/kernel/kvm.c    |   12 ++++++++++++
 arch/x86/kvm/svm.c       |    8 ++++++--
 arch/x86/kvm/vmx.c       |    8 ++++++--
 arch/x86/kvm/x86.c       |   13 +++++++++++--
 include/linux/kvm.h      |    1 +
 include/linux/kvm_para.h |    1 +
 6 files changed, 37 insertions(+), 6 deletions(-)

Comments

Jan Kiszka Feb. 27, 2012, 3:08 p.m. UTC | #1
On 2012-02-27 04:01, Wen Congyang wrote:
> We can know the guest is paniced when the guest runs on xen.
> But we do not have such feature on kvm. This patch implemnts
> this feature, and the implementation is the same as xen:
> register panic notifier, and call hypercall when the guest
> is paniced.
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> ---
>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>  arch/x86/kvm/svm.c       |    8 ++++++--
>  arch/x86/kvm/vmx.c       |    8 ++++++--
>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>  include/linux/kvm.h      |    1 +
>  include/linux/kvm_para.h |    1 +
>  6 files changed, 37 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index f0c6fd6..b928d1d 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>  	.notifier_call = kvm_pv_reboot_notify,
>  };
>  
> +static int
> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
> +{
> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
> +	return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block kvm_pv_panic_nb = {
> +	.notifier_call = kvm_pv_panic_notify,
> +};
> +

You should split up host and guest-side changes.

>  static u64 kvm_steal_clock(int cpu)
>  {
>  	u64 steal;
> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>  
>  	paravirt_ops_setup();
>  	register_reboot_notifier(&kvm_pv_reboot_nb);
> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>  		spin_lock_init(&async_pf_sleepers[i].lock);
>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 0b7690e..38b4705 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>  
>  static int vmmcall_interception(struct vcpu_svm *svm)
>  {
> +	int ret;
> +
>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>  	skip_emulated_instruction(&svm->vcpu);
> -	kvm_emulate_hypercall(&svm->vcpu);
> -	return 1;
> +	ret = kvm_emulate_hypercall(&svm->vcpu);
> +
> +	/* Ignore the error? */
> +	return ret == 0 ? 0 : 1;

Why can't kvm_emulate_hypercall return the right value?

>  }
>  
>  static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 66147ca..1b57ebb 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu)
>  
>  static int handle_vmcall(struct kvm_vcpu *vcpu)
>  {
> +	int ret;
> +
>  	skip_emulated_instruction(vcpu);
> -	kvm_emulate_hypercall(vcpu);
> -	return 1;
> +	ret = kvm_emulate_hypercall(vcpu);
> +
> +	/* Ignore the error? */
> +	return ret == 0 ? 0 : 1;
>  }
>  
>  static int handle_invd(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c9d99e5..3fc2853 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>  	u64 param, ingpa, outgpa, ret;
>  	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
>  	bool fast, longmode;
> -	int cs_db, cs_l;
> +	int cs_db, cs_l, r = 1;
>  
>  	/*
>  	 * hypercall generates UD from non zero cpl and real mode
> @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>  	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
>  		kvm_vcpu_on_spin(vcpu);
>  		break;
> +	case KVM_HC_GUEST_PANIC:
> +		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
> +		r = 0;
> +		break;

That's the wrong place. This is a KVM hypercall, not a HyperV one.

>  	default:
>  		res = HV_STATUS_INVALID_HYPERCALL_CODE;
>  		break;
> @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>  		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
>  	}
>  
> -	return 1;
> +	return r;
>  }
>  
>  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>  	case KVM_HC_VAPIC_POLL_IRQ:
>  		ret = 0;
>  		break;
> +	case KVM_HC_GUEST_PANIC:
> +		ret = 0;
> +		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
> +		r = 0;
> +		break;
>  	default:
>  		ret = -KVM_ENOSYS;
>  		break;
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index acbe429..8f0e31b 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>  #define KVM_EXIT_OSI              18
>  #define KVM_EXIT_PAPR_HCALL	  19
>  #define KVM_EXIT_S390_UCONTROL	  20
> +#define KVM_EXIT_GUEST_PANIC	  21
>  
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  #define KVM_INTERNAL_ERROR_EMULATION 1
> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
> index ff476dd..cf94023 100644
> --- a/include/linux/kvm_para.h
> +++ b/include/linux/kvm_para.h
> @@ -19,6 +19,7 @@
>  #define KVM_HC_MMU_OP			2
>  #define KVM_HC_FEATURES			3
>  #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
> +#define KVM_HC_GUEST_PANIC		5
>  
>  /*
>   * hypercalls use architecture specific

Jan
Wen Congyang Feb. 28, 2012, 5:26 a.m. UTC | #2
At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
> On 2012-02-27 04:01, Wen Congyang wrote:
>> We can know the guest is paniced when the guest runs on xen.
>> But we do not have such feature on kvm. This patch implemnts
>> this feature, and the implementation is the same as xen:
>> register panic notifier, and call hypercall when the guest
>> is paniced.
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> ---
>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>>  arch/x86/kvm/svm.c       |    8 ++++++--
>>  arch/x86/kvm/vmx.c       |    8 ++++++--
>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>>  include/linux/kvm.h      |    1 +
>>  include/linux/kvm_para.h |    1 +
>>  6 files changed, 37 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>> index f0c6fd6..b928d1d 100644
>> --- a/arch/x86/kernel/kvm.c
>> +++ b/arch/x86/kernel/kvm.c
>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>>  	.notifier_call = kvm_pv_reboot_notify,
>>  };
>>  
>> +static int
>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>> +{
>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
>> +	return NOTIFY_DONE;
>> +}
>> +
>> +static struct notifier_block kvm_pv_panic_nb = {
>> +	.notifier_call = kvm_pv_panic_notify,
>> +};
>> +
> 
> You should split up host and guest-side changes.

OK

> 
>>  static u64 kvm_steal_clock(int cpu)
>>  {
>>  	u64 steal;
>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>>  
>>  	paravirt_ops_setup();
>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>>  		spin_lock_init(&async_pf_sleepers[i].lock);
>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 0b7690e..38b4705 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>>  
>>  static int vmmcall_interception(struct vcpu_svm *svm)
>>  {
>> +	int ret;
>> +
>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>>  	skip_emulated_instruction(&svm->vcpu);
>> -	kvm_emulate_hypercall(&svm->vcpu);
>> -	return 1;
>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
>> +
>> +	/* Ignore the error? */
>> +	return ret == 0 ? 0 : 1;
> 
> Why can't kvm_emulate_hypercall return the right value?

Because before this patch, kvm always ignores the error.

After rereading the code, kvm_emulate_hypercall() will return -KVM_EPERM
when vcpu's CPL is not 0. I think we should deal with this exception
in kvm_emulate_hypercall(), and return 1. But I donot know
how to do it. kvm_queue_exception(vcpu, UD_VECTOR)?

> 
>>  }
>>  
>>  static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 66147ca..1b57ebb 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu)
>>  
>>  static int handle_vmcall(struct kvm_vcpu *vcpu)
>>  {
>> +	int ret;
>> +
>>  	skip_emulated_instruction(vcpu);
>> -	kvm_emulate_hypercall(vcpu);
>> -	return 1;
>> +	ret = kvm_emulate_hypercall(vcpu);
>> +
>> +	/* Ignore the error? */
>> +	return ret == 0 ? 0 : 1;
>>  }
>>  
>>  static int handle_invd(struct kvm_vcpu *vcpu)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index c9d99e5..3fc2853 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>  	u64 param, ingpa, outgpa, ret;
>>  	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
>>  	bool fast, longmode;
>> -	int cs_db, cs_l;
>> +	int cs_db, cs_l, r = 1;
>>  
>>  	/*
>>  	 * hypercall generates UD from non zero cpl and real mode
>> @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>  	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
>>  		kvm_vcpu_on_spin(vcpu);
>>  		break;
>> +	case KVM_HC_GUEST_PANIC:
>> +		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
>> +		r = 0;
>> +		break;
> 
> That's the wrong place. This is a KVM hypercall, not a HyperV one.

OK, I will remove it.

Thanks
Wen Congyang

> 
>>  	default:
>>  		res = HV_STATUS_INVALID_HYPERCALL_CODE;
>>  		break;
>> @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>  		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
>>  	}
>>  
>> -	return 1;
>> +	return r;
>>  }
>>  
>>  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>> @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>>  	case KVM_HC_VAPIC_POLL_IRQ:
>>  		ret = 0;
>>  		break;
>> +	case KVM_HC_GUEST_PANIC:
>> +		ret = 0;
>> +		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
>> +		r = 0;
>> +		break;
>>  	default:
>>  		ret = -KVM_ENOSYS;
>>  		break;
>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>> index acbe429..8f0e31b 100644
>> --- a/include/linux/kvm.h
>> +++ b/include/linux/kvm.h
>> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>>  #define KVM_EXIT_OSI              18
>>  #define KVM_EXIT_PAPR_HCALL	  19
>>  #define KVM_EXIT_S390_UCONTROL	  20
>> +#define KVM_EXIT_GUEST_PANIC	  21
>>  
>>  /* For KVM_EXIT_INTERNAL_ERROR */
>>  #define KVM_INTERNAL_ERROR_EMULATION 1
>> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
>> index ff476dd..cf94023 100644
>> --- a/include/linux/kvm_para.h
>> +++ b/include/linux/kvm_para.h
>> @@ -19,6 +19,7 @@
>>  #define KVM_HC_MMU_OP			2
>>  #define KVM_HC_FEATURES			3
>>  #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
>> +#define KVM_HC_GUEST_PANIC		5
>>  
>>  /*
>>   * hypercalls use architecture specific
> 
> Jan
>
Wen Congyang Feb. 28, 2012, 8:07 a.m. UTC | #3
At 02/28/2012 01:26 PM, Wen Congyang Wrote:
> At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
>> On 2012-02-27 04:01, Wen Congyang wrote:
>>> We can know the guest is paniced when the guest runs on xen.
>>> But we do not have such feature on kvm. This patch implemnts
>>> this feature, and the implementation is the same as xen:
>>> register panic notifier, and call hypercall when the guest
>>> is paniced.
>>>
>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>> ---
>>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>>>  arch/x86/kvm/svm.c       |    8 ++++++--
>>>  arch/x86/kvm/vmx.c       |    8 ++++++--
>>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>>>  include/linux/kvm.h      |    1 +
>>>  include/linux/kvm_para.h |    1 +
>>>  6 files changed, 37 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>> index f0c6fd6..b928d1d 100644
>>> --- a/arch/x86/kernel/kvm.c
>>> +++ b/arch/x86/kernel/kvm.c
>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>>>  	.notifier_call = kvm_pv_reboot_notify,
>>>  };
>>>  
>>> +static int
>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>>> +{
>>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
>>> +	return NOTIFY_DONE;
>>> +}
>>> +
>>> +static struct notifier_block kvm_pv_panic_nb = {
>>> +	.notifier_call = kvm_pv_panic_notify,
>>> +};
>>> +
>>
>> You should split up host and guest-side changes.
> 
> OK
> 
>>
>>>  static u64 kvm_steal_clock(int cpu)
>>>  {
>>>  	u64 steal;
>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>>>  
>>>  	paravirt_ops_setup();
>>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
>>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>>>  		spin_lock_init(&async_pf_sleepers[i].lock);
>>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index 0b7690e..38b4705 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>>>  
>>>  static int vmmcall_interception(struct vcpu_svm *svm)
>>>  {
>>> +	int ret;
>>> +
>>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>>>  	skip_emulated_instruction(&svm->vcpu);
>>> -	kvm_emulate_hypercall(&svm->vcpu);
>>> -	return 1;
>>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
>>> +
>>> +	/* Ignore the error? */
>>> +	return ret == 0 ? 0 : 1;
>>
>> Why can't kvm_emulate_hypercall return the right value?
> 
> Because before this patch, kvm always ignores the error.
> 
> After rereading the code, kvm_emulate_hypercall() will return -KVM_EPERM

Sorry for my miss. kvm_emulate_hypercall() does not return -KVM_EPERM.

> when vcpu's CPL is not 0. I think we should deal with this exception
> in kvm_emulate_hypercall(), and return 1. But I donot know
> how to do it. kvm_queue_exception(vcpu, UD_VECTOR)?
> 
>>
>>>  }
>>>  
>>>  static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 66147ca..1b57ebb 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu)
>>>  
>>>  static int handle_vmcall(struct kvm_vcpu *vcpu)
>>>  {
>>> +	int ret;
>>> +
>>>  	skip_emulated_instruction(vcpu);
>>> -	kvm_emulate_hypercall(vcpu);
>>> -	return 1;
>>> +	ret = kvm_emulate_hypercall(vcpu);
>>> +
>>> +	/* Ignore the error? */
>>> +	return ret == 0 ? 0 : 1;
>>>  }
>>>  
>>>  static int handle_invd(struct kvm_vcpu *vcpu)
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index c9d99e5..3fc2853 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>>  	u64 param, ingpa, outgpa, ret;
>>>  	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
>>>  	bool fast, longmode;
>>> -	int cs_db, cs_l;
>>> +	int cs_db, cs_l, r = 1;
>>>  
>>>  	/*
>>>  	 * hypercall generates UD from non zero cpl and real mode
>>> @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>>  	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
>>>  		kvm_vcpu_on_spin(vcpu);
>>>  		break;
>>> +	case KVM_HC_GUEST_PANIC:
>>> +		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
>>> +		r = 0;
>>> +		break;
>>
>> That's the wrong place. This is a KVM hypercall, not a HyperV one.
> 
> OK, I will remove it.
> 
> Thanks
> Wen Congyang
> 
>>
>>>  	default:
>>>  		res = HV_STATUS_INVALID_HYPERCALL_CODE;
>>>  		break;
>>> @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>>  		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
>>>  	}
>>>  
>>> -	return 1;
>>> +	return r;
>>>  }
>>>  
>>>  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>>> @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>>>  	case KVM_HC_VAPIC_POLL_IRQ:
>>>  		ret = 0;
>>>  		break;
>>> +	case KVM_HC_GUEST_PANIC:
>>> +		ret = 0;
>>> +		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
>>> +		r = 0;
>>> +		break;
>>>  	default:
>>>  		ret = -KVM_ENOSYS;
>>>  		break;
>>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>>> index acbe429..8f0e31b 100644
>>> --- a/include/linux/kvm.h
>>> +++ b/include/linux/kvm.h
>>> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>>>  #define KVM_EXIT_OSI              18
>>>  #define KVM_EXIT_PAPR_HCALL	  19
>>>  #define KVM_EXIT_S390_UCONTROL	  20
>>> +#define KVM_EXIT_GUEST_PANIC	  21
>>>  
>>>  /* For KVM_EXIT_INTERNAL_ERROR */
>>>  #define KVM_INTERNAL_ERROR_EMULATION 1
>>> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
>>> index ff476dd..cf94023 100644
>>> --- a/include/linux/kvm_para.h
>>> +++ b/include/linux/kvm_para.h
>>> @@ -19,6 +19,7 @@
>>>  #define KVM_HC_MMU_OP			2
>>>  #define KVM_HC_FEATURES			3
>>>  #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
>>> +#define KVM_HC_GUEST_PANIC		5
>>>  
>>>  /*
>>>   * hypercalls use architecture specific
>>
>> Jan
>>
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
Wen Congyang Feb. 28, 2012, 8:23 a.m. UTC | #4
At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
> On 2012-02-27 04:01, Wen Congyang wrote:
>> We can know the guest is paniced when the guest runs on xen.
>> But we do not have such feature on kvm. This patch implemnts
>> this feature, and the implementation is the same as xen:
>> register panic notifier, and call hypercall when the guest
>> is paniced.
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> ---
>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>>  arch/x86/kvm/svm.c       |    8 ++++++--
>>  arch/x86/kvm/vmx.c       |    8 ++++++--
>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>>  include/linux/kvm.h      |    1 +
>>  include/linux/kvm_para.h |    1 +
>>  6 files changed, 37 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>> index f0c6fd6..b928d1d 100644
>> --- a/arch/x86/kernel/kvm.c
>> +++ b/arch/x86/kernel/kvm.c
>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>>  	.notifier_call = kvm_pv_reboot_notify,
>>  };
>>  
>> +static int
>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>> +{
>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
>> +	return NOTIFY_DONE;
>> +}
>> +
>> +static struct notifier_block kvm_pv_panic_nb = {
>> +	.notifier_call = kvm_pv_panic_notify,
>> +};
>> +
> 
> You should split up host and guest-side changes.
> 
>>  static u64 kvm_steal_clock(int cpu)
>>  {
>>  	u64 steal;
>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>>  
>>  	paravirt_ops_setup();
>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>>  		spin_lock_init(&async_pf_sleepers[i].lock);
>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 0b7690e..38b4705 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>>  
>>  static int vmmcall_interception(struct vcpu_svm *svm)
>>  {
>> +	int ret;
>> +
>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>>  	skip_emulated_instruction(&svm->vcpu);
>> -	kvm_emulate_hypercall(&svm->vcpu);
>> -	return 1;
>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
>> +
>> +	/* Ignore the error? */
>> +	return ret == 0 ? 0 : 1;
> 
> Why can't kvm_emulate_hypercall return the right value?

kvm_emulate_hypercall() will call kvm_hv_hypercall(), and
kvm_hv_hypercall() will return 0 when vcpu's CPL > 0.
If vcpu's CPL > 0, does kvm need to exit and tell it to
qemu?

Thanks
Wen Congyang

> 
>>  }
>>  
>>  static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 66147ca..1b57ebb 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu)
>>  
>>  static int handle_vmcall(struct kvm_vcpu *vcpu)
>>  {
>> +	int ret;
>> +
>>  	skip_emulated_instruction(vcpu);
>> -	kvm_emulate_hypercall(vcpu);
>> -	return 1;
>> +	ret = kvm_emulate_hypercall(vcpu);
>> +
>> +	/* Ignore the error? */
>> +	return ret == 0 ? 0 : 1;
>>  }
>>  
>>  static int handle_invd(struct kvm_vcpu *vcpu)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index c9d99e5..3fc2853 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>  	u64 param, ingpa, outgpa, ret;
>>  	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
>>  	bool fast, longmode;
>> -	int cs_db, cs_l;
>> +	int cs_db, cs_l, r = 1;
>>  
>>  	/*
>>  	 * hypercall generates UD from non zero cpl and real mode
>> @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>  	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
>>  		kvm_vcpu_on_spin(vcpu);
>>  		break;
>> +	case KVM_HC_GUEST_PANIC:
>> +		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
>> +		r = 0;
>> +		break;
> 
> That's the wrong place. This is a KVM hypercall, not a HyperV one.
> 
>>  	default:
>>  		res = HV_STATUS_INVALID_HYPERCALL_CODE;
>>  		break;
>> @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>  		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
>>  	}
>>  
>> -	return 1;
>> +	return r;
>>  }
>>  
>>  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>> @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>>  	case KVM_HC_VAPIC_POLL_IRQ:
>>  		ret = 0;
>>  		break;
>> +	case KVM_HC_GUEST_PANIC:
>> +		ret = 0;
>> +		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
>> +		r = 0;
>> +		break;
>>  	default:
>>  		ret = -KVM_ENOSYS;
>>  		break;
>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>> index acbe429..8f0e31b 100644
>> --- a/include/linux/kvm.h
>> +++ b/include/linux/kvm.h
>> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>>  #define KVM_EXIT_OSI              18
>>  #define KVM_EXIT_PAPR_HCALL	  19
>>  #define KVM_EXIT_S390_UCONTROL	  20
>> +#define KVM_EXIT_GUEST_PANIC	  21
>>  
>>  /* For KVM_EXIT_INTERNAL_ERROR */
>>  #define KVM_INTERNAL_ERROR_EMULATION 1
>> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
>> index ff476dd..cf94023 100644
>> --- a/include/linux/kvm_para.h
>> +++ b/include/linux/kvm_para.h
>> @@ -19,6 +19,7 @@
>>  #define KVM_HC_MMU_OP			2
>>  #define KVM_HC_FEATURES			3
>>  #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
>> +#define KVM_HC_GUEST_PANIC		5
>>  
>>  /*
>>   * hypercalls use architecture specific
> 
> Jan
>
Jan Kiszka Feb. 28, 2012, 9:34 a.m. UTC | #5
On 2012-02-28 09:23, Wen Congyang wrote:
> At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
>> On 2012-02-27 04:01, Wen Congyang wrote:
>>> We can know the guest is paniced when the guest runs on xen.
>>> But we do not have such feature on kvm. This patch implemnts
>>> this feature, and the implementation is the same as xen:
>>> register panic notifier, and call hypercall when the guest
>>> is paniced.
>>>
>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>> ---
>>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>>>  arch/x86/kvm/svm.c       |    8 ++++++--
>>>  arch/x86/kvm/vmx.c       |    8 ++++++--
>>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>>>  include/linux/kvm.h      |    1 +
>>>  include/linux/kvm_para.h |    1 +
>>>  6 files changed, 37 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>> index f0c6fd6..b928d1d 100644
>>> --- a/arch/x86/kernel/kvm.c
>>> +++ b/arch/x86/kernel/kvm.c
>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>>>  	.notifier_call = kvm_pv_reboot_notify,
>>>  };
>>>  
>>> +static int
>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>>> +{
>>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
>>> +	return NOTIFY_DONE;
>>> +}
>>> +
>>> +static struct notifier_block kvm_pv_panic_nb = {
>>> +	.notifier_call = kvm_pv_panic_notify,
>>> +};
>>> +
>>
>> You should split up host and guest-side changes.
>>
>>>  static u64 kvm_steal_clock(int cpu)
>>>  {
>>>  	u64 steal;
>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>>>  
>>>  	paravirt_ops_setup();
>>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
>>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>>>  		spin_lock_init(&async_pf_sleepers[i].lock);
>>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index 0b7690e..38b4705 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>>>  
>>>  static int vmmcall_interception(struct vcpu_svm *svm)
>>>  {
>>> +	int ret;
>>> +
>>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>>>  	skip_emulated_instruction(&svm->vcpu);
>>> -	kvm_emulate_hypercall(&svm->vcpu);
>>> -	return 1;
>>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
>>> +
>>> +	/* Ignore the error? */
>>> +	return ret == 0 ? 0 : 1;
>>
>> Why can't kvm_emulate_hypercall return the right value?
> 
> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and
> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0.
> If vcpu's CPL > 0, does kvm need to exit and tell it to
> qemu?

No, there is currently no exit to userspace due to hypercalls, neither
of HV nor KVM kind.

The point is that the return code of kvm_emulate_hypercall is unused so
far, so you can easily redefine it to encode continue vs. exit to
userspace. Once someone has different needs, this could still be
refactored again.

Jan
Wen Congyang Feb. 28, 2012, 9:42 a.m. UTC | #6
At 02/28/2012 05:34 PM, Jan Kiszka Wrote:
> On 2012-02-28 09:23, Wen Congyang wrote:
>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
>>> On 2012-02-27 04:01, Wen Congyang wrote:
>>>> We can know the guest is paniced when the guest runs on xen.
>>>> But we do not have such feature on kvm. This patch implemnts
>>>> this feature, and the implementation is the same as xen:
>>>> register panic notifier, and call hypercall when the guest
>>>> is paniced.
>>>>
>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>>> ---
>>>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>>>>  arch/x86/kvm/svm.c       |    8 ++++++--
>>>>  arch/x86/kvm/vmx.c       |    8 ++++++--
>>>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>>>>  include/linux/kvm.h      |    1 +
>>>>  include/linux/kvm_para.h |    1 +
>>>>  6 files changed, 37 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>>> index f0c6fd6..b928d1d 100644
>>>> --- a/arch/x86/kernel/kvm.c
>>>> +++ b/arch/x86/kernel/kvm.c
>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>>>>  	.notifier_call = kvm_pv_reboot_notify,
>>>>  };
>>>>  
>>>> +static int
>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>>>> +{
>>>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
>>>> +	return NOTIFY_DONE;
>>>> +}
>>>> +
>>>> +static struct notifier_block kvm_pv_panic_nb = {
>>>> +	.notifier_call = kvm_pv_panic_notify,
>>>> +};
>>>> +
>>>
>>> You should split up host and guest-side changes.
>>>
>>>>  static u64 kvm_steal_clock(int cpu)
>>>>  {
>>>>  	u64 steal;
>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>>>>  
>>>>  	paravirt_ops_setup();
>>>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
>>>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>>>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>>>>  		spin_lock_init(&async_pf_sleepers[i].lock);
>>>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>> index 0b7690e..38b4705 100644
>>>> --- a/arch/x86/kvm/svm.c
>>>> +++ b/arch/x86/kvm/svm.c
>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>>>>  
>>>>  static int vmmcall_interception(struct vcpu_svm *svm)
>>>>  {
>>>> +	int ret;
>>>> +
>>>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>>>>  	skip_emulated_instruction(&svm->vcpu);
>>>> -	kvm_emulate_hypercall(&svm->vcpu);
>>>> -	return 1;
>>>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
>>>> +
>>>> +	/* Ignore the error? */
>>>> +	return ret == 0 ? 0 : 1;
>>>
>>> Why can't kvm_emulate_hypercall return the right value?
>>
>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and
>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0.
>> If vcpu's CPL > 0, does kvm need to exit and tell it to
>> qemu?
> 
> No, there is currently no exit to userspace due to hypercalls, neither
> of HV nor KVM kind.
> 
> The point is that the return code of kvm_emulate_hypercall is unused so
> far, so you can easily redefine it to encode continue vs. exit to
> userspace. Once someone has different needs, this could still be
> refactored again.

So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's
CPL > 0?

Thanks
Wen Congyang
> 
> Jan
>
Jan Kiszka Feb. 28, 2012, 10:19 a.m. UTC | #7
On 2012-02-28 10:42, Wen Congyang wrote:
> At 02/28/2012 05:34 PM, Jan Kiszka Wrote:
>> On 2012-02-28 09:23, Wen Congyang wrote:
>>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
>>>> On 2012-02-27 04:01, Wen Congyang wrote:
>>>>> We can know the guest is paniced when the guest runs on xen.
>>>>> But we do not have such feature on kvm. This patch implemnts
>>>>> this feature, and the implementation is the same as xen:
>>>>> register panic notifier, and call hypercall when the guest
>>>>> is paniced.
>>>>>
>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>>>> ---
>>>>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>>>>>  arch/x86/kvm/svm.c       |    8 ++++++--
>>>>>  arch/x86/kvm/vmx.c       |    8 ++++++--
>>>>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>>>>>  include/linux/kvm.h      |    1 +
>>>>>  include/linux/kvm_para.h |    1 +
>>>>>  6 files changed, 37 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>>>> index f0c6fd6..b928d1d 100644
>>>>> --- a/arch/x86/kernel/kvm.c
>>>>> +++ b/arch/x86/kernel/kvm.c
>>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>>>>>  	.notifier_call = kvm_pv_reboot_notify,
>>>>>  };
>>>>>  
>>>>> +static int
>>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>>>>> +{
>>>>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
>>>>> +	return NOTIFY_DONE;
>>>>> +}
>>>>> +
>>>>> +static struct notifier_block kvm_pv_panic_nb = {
>>>>> +	.notifier_call = kvm_pv_panic_notify,
>>>>> +};
>>>>> +
>>>>
>>>> You should split up host and guest-side changes.
>>>>
>>>>>  static u64 kvm_steal_clock(int cpu)
>>>>>  {
>>>>>  	u64 steal;
>>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>>>>>  
>>>>>  	paravirt_ops_setup();
>>>>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
>>>>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>>>>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>>>>>  		spin_lock_init(&async_pf_sleepers[i].lock);
>>>>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>>> index 0b7690e..38b4705 100644
>>>>> --- a/arch/x86/kvm/svm.c
>>>>> +++ b/arch/x86/kvm/svm.c
>>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>>>>>  
>>>>>  static int vmmcall_interception(struct vcpu_svm *svm)
>>>>>  {
>>>>> +	int ret;
>>>>> +
>>>>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>>>>>  	skip_emulated_instruction(&svm->vcpu);
>>>>> -	kvm_emulate_hypercall(&svm->vcpu);
>>>>> -	return 1;
>>>>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
>>>>> +
>>>>> +	/* Ignore the error? */
>>>>> +	return ret == 0 ? 0 : 1;
>>>>
>>>> Why can't kvm_emulate_hypercall return the right value?
>>>
>>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and
>>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0.
>>> If vcpu's CPL > 0, does kvm need to exit and tell it to
>>> qemu?
>>
>> No, there is currently no exit to userspace due to hypercalls, neither
>> of HV nor KVM kind.
>>
>> The point is that the return code of kvm_emulate_hypercall is unused so
>> far, so you can easily redefine it to encode continue vs. exit to
>> userspace. Once someone has different needs, this could still be
>> refactored again.
> 
> So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's
> CPL > 0?

Yes, change it to encode what vendor modules need to return to their
callers.

Jan
Gleb Natapov Feb. 28, 2012, 10:45 a.m. UTC | #8
On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote:
> On 2012-02-28 10:42, Wen Congyang wrote:
> > At 02/28/2012 05:34 PM, Jan Kiszka Wrote:
> >> On 2012-02-28 09:23, Wen Congyang wrote:
> >>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
> >>>> On 2012-02-27 04:01, Wen Congyang wrote:
> >>>>> We can know the guest is paniced when the guest runs on xen.
> >>>>> But we do not have such feature on kvm. This patch implemnts
> >>>>> this feature, and the implementation is the same as xen:
> >>>>> register panic notifier, and call hypercall when the guest
> >>>>> is paniced.
> >>>>>
> >>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> >>>>> ---
> >>>>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
> >>>>>  arch/x86/kvm/svm.c       |    8 ++++++--
> >>>>>  arch/x86/kvm/vmx.c       |    8 ++++++--
> >>>>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
> >>>>>  include/linux/kvm.h      |    1 +
> >>>>>  include/linux/kvm_para.h |    1 +
> >>>>>  6 files changed, 37 insertions(+), 6 deletions(-)
> >>>>>
> >>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> >>>>> index f0c6fd6..b928d1d 100644
> >>>>> --- a/arch/x86/kernel/kvm.c
> >>>>> +++ b/arch/x86/kernel/kvm.c
> >>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
> >>>>>  	.notifier_call = kvm_pv_reboot_notify,
> >>>>>  };
> >>>>>  
> >>>>> +static int
> >>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
> >>>>> +{
> >>>>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
> >>>>> +	return NOTIFY_DONE;
> >>>>> +}
> >>>>> +
> >>>>> +static struct notifier_block kvm_pv_panic_nb = {
> >>>>> +	.notifier_call = kvm_pv_panic_notify,
> >>>>> +};
> >>>>> +
> >>>>
> >>>> You should split up host and guest-side changes.
> >>>>
> >>>>>  static u64 kvm_steal_clock(int cpu)
> >>>>>  {
> >>>>>  	u64 steal;
> >>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
> >>>>>  
> >>>>>  	paravirt_ops_setup();
> >>>>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
> >>>>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
> >>>>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
> >>>>>  		spin_lock_init(&async_pf_sleepers[i].lock);
> >>>>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
> >>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> >>>>> index 0b7690e..38b4705 100644
> >>>>> --- a/arch/x86/kvm/svm.c
> >>>>> +++ b/arch/x86/kvm/svm.c
> >>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
> >>>>>  
> >>>>>  static int vmmcall_interception(struct vcpu_svm *svm)
> >>>>>  {
> >>>>> +	int ret;
> >>>>> +
> >>>>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
> >>>>>  	skip_emulated_instruction(&svm->vcpu);
> >>>>> -	kvm_emulate_hypercall(&svm->vcpu);
> >>>>> -	return 1;
> >>>>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
> >>>>> +
> >>>>> +	/* Ignore the error? */
> >>>>> +	return ret == 0 ? 0 : 1;
> >>>>
> >>>> Why can't kvm_emulate_hypercall return the right value?
> >>>
> >>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and
> >>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0.
> >>> If vcpu's CPL > 0, does kvm need to exit and tell it to
> >>> qemu?
> >>
> >> No, there is currently no exit to userspace due to hypercalls, neither
> >> of HV nor KVM kind.
> >>
> >> The point is that the return code of kvm_emulate_hypercall is unused so
> >> far, so you can easily redefine it to encode continue vs. exit to
> >> userspace. Once someone has different needs, this could still be
> >> refactored again.
> > 
> > So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's
> > CPL > 0?
> 
> Yes, change it to encode what vendor modules need to return to their
> callers.
> 
Better introduce new request flag and set it in your hypercall emulation. See
how triple fault is handled.

--
			Gleb.
Avi Kivity Feb. 28, 2012, 11:23 a.m. UTC | #9
On 02/27/2012 05:01 AM, Wen Congyang wrote:
> We can know the guest is paniced when the guest runs on xen.
> But we do not have such feature on kvm. This patch implemnts
> this feature, and the implementation is the same as xen:
> register panic notifier, and call hypercall when the guest
> is paniced.

What's the motivation for this?  "Xen does this" is insufficient.
Wen Congyang Feb. 29, 2012, 1:08 a.m. UTC | #10
At 02/28/2012 06:45 PM, Gleb Natapov Wrote:
> On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote:
>> On 2012-02-28 10:42, Wen Congyang wrote:
>>> At 02/28/2012 05:34 PM, Jan Kiszka Wrote:
>>>> On 2012-02-28 09:23, Wen Congyang wrote:
>>>>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
>>>>>> On 2012-02-27 04:01, Wen Congyang wrote:
>>>>>>> We can know the guest is paniced when the guest runs on xen.
>>>>>>> But we do not have such feature on kvm. This patch implemnts
>>>>>>> this feature, and the implementation is the same as xen:
>>>>>>> register panic notifier, and call hypercall when the guest
>>>>>>> is paniced.
>>>>>>>
>>>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>>>>>> ---
>>>>>>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>>>>>>>  arch/x86/kvm/svm.c       |    8 ++++++--
>>>>>>>  arch/x86/kvm/vmx.c       |    8 ++++++--
>>>>>>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>>>>>>>  include/linux/kvm.h      |    1 +
>>>>>>>  include/linux/kvm_para.h |    1 +
>>>>>>>  6 files changed, 37 insertions(+), 6 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>>>>>> index f0c6fd6..b928d1d 100644
>>>>>>> --- a/arch/x86/kernel/kvm.c
>>>>>>> +++ b/arch/x86/kernel/kvm.c
>>>>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>>>>>>>  	.notifier_call = kvm_pv_reboot_notify,
>>>>>>>  };
>>>>>>>  
>>>>>>> +static int
>>>>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>>>>>>> +{
>>>>>>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
>>>>>>> +	return NOTIFY_DONE;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static struct notifier_block kvm_pv_panic_nb = {
>>>>>>> +	.notifier_call = kvm_pv_panic_notify,
>>>>>>> +};
>>>>>>> +
>>>>>>
>>>>>> You should split up host and guest-side changes.
>>>>>>
>>>>>>>  static u64 kvm_steal_clock(int cpu)
>>>>>>>  {
>>>>>>>  	u64 steal;
>>>>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>>>>>>>  
>>>>>>>  	paravirt_ops_setup();
>>>>>>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
>>>>>>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>>>>>>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>>>>>>>  		spin_lock_init(&async_pf_sleepers[i].lock);
>>>>>>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>>>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>>>>> index 0b7690e..38b4705 100644
>>>>>>> --- a/arch/x86/kvm/svm.c
>>>>>>> +++ b/arch/x86/kvm/svm.c
>>>>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>>>>>>>  
>>>>>>>  static int vmmcall_interception(struct vcpu_svm *svm)
>>>>>>>  {
>>>>>>> +	int ret;
>>>>>>> +
>>>>>>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>>>>>>>  	skip_emulated_instruction(&svm->vcpu);
>>>>>>> -	kvm_emulate_hypercall(&svm->vcpu);
>>>>>>> -	return 1;
>>>>>>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
>>>>>>> +
>>>>>>> +	/* Ignore the error? */
>>>>>>> +	return ret == 0 ? 0 : 1;
>>>>>>
>>>>>> Why can't kvm_emulate_hypercall return the right value?
>>>>>
>>>>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and
>>>>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0.
>>>>> If vcpu's CPL > 0, does kvm need to exit and tell it to
>>>>> qemu?
>>>>
>>>> No, there is currently no exit to userspace due to hypercalls, neither
>>>> of HV nor KVM kind.
>>>>
>>>> The point is that the return code of kvm_emulate_hypercall is unused so
>>>> far, so you can easily redefine it to encode continue vs. exit to
>>>> userspace. Once someone has different needs, this could still be
>>>> refactored again.
>>>
>>> So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's
>>> CPL > 0?
>>
>> Yes, change it to encode what vendor modules need to return to their
>> callers.
>>
> Better introduce new request flag and set it in your hypercall emulation. See
> how triple fault is handled.

triple fault sets KVM_EXIT_SHUTDOWN and exits to userspace. Do you mean introduce
a new value(like KVM_EXIT_SHUTDOWN)?

Thanks
Wen Congyang

> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
Wen Congyang Feb. 29, 2012, 1:29 a.m. UTC | #11
At 02/28/2012 07:23 PM, Avi Kivity Wrote:
> On 02/27/2012 05:01 AM, Wen Congyang wrote:
>> We can know the guest is paniced when the guest runs on xen.
>> But we do not have such feature on kvm. This patch implemnts
>> this feature, and the implementation is the same as xen:
>> register panic notifier, and call hypercall when the guest
>> is paniced.
> 
> What's the motivation for this?  "Xen does this" is insufficient.

Another purpose is: management app(for example: libvirt) can do auto
dump when the guest is crashed. If management app does not do auto
dump, the guest's user can do dump by hand if he sees the guest is
paniced.

I am thinking about another status: dumping. This status tells
the guest's user that the guest is paniced, and the OS's dump function
is working.

These two status can tell the guest's user whether the guest is pancied,
and what should he do if the guest is paniced.

Thanks
Wen Congyang
Gleb Natapov Feb. 29, 2012, 9:36 a.m. UTC | #12
On Wed, Feb 29, 2012 at 09:08:52AM +0800, Wen Congyang wrote:
> At 02/28/2012 06:45 PM, Gleb Natapov Wrote:
> > On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote:
> >> On 2012-02-28 10:42, Wen Congyang wrote:
> >>> At 02/28/2012 05:34 PM, Jan Kiszka Wrote:
> >>>> On 2012-02-28 09:23, Wen Congyang wrote:
> >>>>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
> >>>>>> On 2012-02-27 04:01, Wen Congyang wrote:
> >>>>>>> We can know the guest is paniced when the guest runs on xen.
> >>>>>>> But we do not have such feature on kvm. This patch implemnts
> >>>>>>> this feature, and the implementation is the same as xen:
> >>>>>>> register panic notifier, and call hypercall when the guest
> >>>>>>> is paniced.
> >>>>>>>
> >>>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> >>>>>>> ---
> >>>>>>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
> >>>>>>>  arch/x86/kvm/svm.c       |    8 ++++++--
> >>>>>>>  arch/x86/kvm/vmx.c       |    8 ++++++--
> >>>>>>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
> >>>>>>>  include/linux/kvm.h      |    1 +
> >>>>>>>  include/linux/kvm_para.h |    1 +
> >>>>>>>  6 files changed, 37 insertions(+), 6 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> >>>>>>> index f0c6fd6..b928d1d 100644
> >>>>>>> --- a/arch/x86/kernel/kvm.c
> >>>>>>> +++ b/arch/x86/kernel/kvm.c
> >>>>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
> >>>>>>>  	.notifier_call = kvm_pv_reboot_notify,
> >>>>>>>  };
> >>>>>>>  
> >>>>>>> +static int
> >>>>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
> >>>>>>> +{
> >>>>>>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
> >>>>>>> +	return NOTIFY_DONE;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static struct notifier_block kvm_pv_panic_nb = {
> >>>>>>> +	.notifier_call = kvm_pv_panic_notify,
> >>>>>>> +};
> >>>>>>> +
> >>>>>>
> >>>>>> You should split up host and guest-side changes.
> >>>>>>
> >>>>>>>  static u64 kvm_steal_clock(int cpu)
> >>>>>>>  {
> >>>>>>>  	u64 steal;
> >>>>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
> >>>>>>>  
> >>>>>>>  	paravirt_ops_setup();
> >>>>>>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
> >>>>>>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
> >>>>>>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
> >>>>>>>  		spin_lock_init(&async_pf_sleepers[i].lock);
> >>>>>>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
> >>>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> >>>>>>> index 0b7690e..38b4705 100644
> >>>>>>> --- a/arch/x86/kvm/svm.c
> >>>>>>> +++ b/arch/x86/kvm/svm.c
> >>>>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
> >>>>>>>  
> >>>>>>>  static int vmmcall_interception(struct vcpu_svm *svm)
> >>>>>>>  {
> >>>>>>> +	int ret;
> >>>>>>> +
> >>>>>>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
> >>>>>>>  	skip_emulated_instruction(&svm->vcpu);
> >>>>>>> -	kvm_emulate_hypercall(&svm->vcpu);
> >>>>>>> -	return 1;
> >>>>>>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
> >>>>>>> +
> >>>>>>> +	/* Ignore the error? */
> >>>>>>> +	return ret == 0 ? 0 : 1;
> >>>>>>
> >>>>>> Why can't kvm_emulate_hypercall return the right value?
> >>>>>
> >>>>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and
> >>>>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0.
> >>>>> If vcpu's CPL > 0, does kvm need to exit and tell it to
> >>>>> qemu?
> >>>>
> >>>> No, there is currently no exit to userspace due to hypercalls, neither
> >>>> of HV nor KVM kind.
> >>>>
> >>>> The point is that the return code of kvm_emulate_hypercall is unused so
> >>>> far, so you can easily redefine it to encode continue vs. exit to
> >>>> userspace. Once someone has different needs, this could still be
> >>>> refactored again.
> >>>
> >>> So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's
> >>> CPL > 0?
> >>
> >> Yes, change it to encode what vendor modules need to return to their
> >> callers.
> >>
> > Better introduce new request flag and set it in your hypercall emulation. See
> > how triple fault is handled.
> 
> triple fault sets KVM_EXIT_SHUTDOWN and exits to userspace. Do you mean introduce
> a new value(like KVM_EXIT_SHUTDOWN)?
> 
I mean introduce new request bit (like KVM_REQ_TRIPLE_FAULT) and set it
in your hypercall if exit to userspace is needed instead of changing
return values.

--
			Gleb.
Avi Kivity Feb. 29, 2012, 9:49 a.m. UTC | #13
On 02/29/2012 03:29 AM, Wen Congyang wrote:
> At 02/28/2012 07:23 PM, Avi Kivity Wrote:
> > On 02/27/2012 05:01 AM, Wen Congyang wrote:
> >> We can know the guest is paniced when the guest runs on xen.
> >> But we do not have such feature on kvm. This patch implemnts
> >> this feature, and the implementation is the same as xen:
> >> register panic notifier, and call hypercall when the guest
> >> is paniced.
> > 
> > What's the motivation for this?  "Xen does this" is insufficient.
>
> Another purpose is: management app(for example: libvirt) can do auto
> dump when the guest is crashed. If management app does not do auto
> dump, the guest's user can do dump by hand if he sees the guest is
> paniced.
>
> I am thinking about another status: dumping. This status tells
> the guest's user that the guest is paniced, and the OS's dump function
> is working.
>
> These two status can tell the guest's user whether the guest is pancied,
> and what should he do if the guest is paniced.
>

How about using a virtio-serial channel for this?  You can transfer any
amount of information (including the dump itself).
Gleb Natapov Feb. 29, 2012, 9:55 a.m. UTC | #14
On Wed, Feb 29, 2012 at 11:49:58AM +0200, Avi Kivity wrote:
> On 02/29/2012 03:29 AM, Wen Congyang wrote:
> > At 02/28/2012 07:23 PM, Avi Kivity Wrote:
> > > On 02/27/2012 05:01 AM, Wen Congyang wrote:
> > >> We can know the guest is paniced when the guest runs on xen.
> > >> But we do not have such feature on kvm. This patch implemnts
> > >> this feature, and the implementation is the same as xen:
> > >> register panic notifier, and call hypercall when the guest
> > >> is paniced.
> > > 
> > > What's the motivation for this?  "Xen does this" is insufficient.
> >
> > Another purpose is: management app(for example: libvirt) can do auto
> > dump when the guest is crashed. If management app does not do auto
> > dump, the guest's user can do dump by hand if he sees the guest is
> > paniced.
> >
> > I am thinking about another status: dumping. This status tells
> > the guest's user that the guest is paniced, and the OS's dump function
> > is working.
> >
> > These two status can tell the guest's user whether the guest is pancied,
> > and what should he do if the guest is paniced.
> >
> 
> How about using a virtio-serial channel for this?  You can transfer any
> amount of information (including the dump itself).
> 
Isn't it unreliable after the guest panicked? Having special kdump
kernel that transfers dump to a host via virtio-serial channel though
sounds interesting. May be that's what you mean.

--
			Gleb.
Daniel P. Berrangé Feb. 29, 2012, 9:58 a.m. UTC | #15
On Wed, Feb 29, 2012 at 11:49:58AM +0200, Avi Kivity wrote:
> On 02/29/2012 03:29 AM, Wen Congyang wrote:
> > At 02/28/2012 07:23 PM, Avi Kivity Wrote:
> > > On 02/27/2012 05:01 AM, Wen Congyang wrote:
> > >> We can know the guest is paniced when the guest runs on xen.
> > >> But we do not have such feature on kvm. This patch implemnts
> > >> this feature, and the implementation is the same as xen:
> > >> register panic notifier, and call hypercall when the guest
> > >> is paniced.
> > > 
> > > What's the motivation for this?  "Xen does this" is insufficient.
> >
> > Another purpose is: management app(for example: libvirt) can do auto
> > dump when the guest is crashed. If management app does not do auto
> > dump, the guest's user can do dump by hand if he sees the guest is
> > paniced.
> >
> > I am thinking about another status: dumping. This status tells
> > the guest's user that the guest is paniced, and the OS's dump function
> > is working.
> >
> > These two status can tell the guest's user whether the guest is pancied,
> > and what should he do if the guest is paniced.
> >
> 
> How about using a virtio-serial channel for this?  You can transfer any
> amount of information (including the dump itself).

When the guest OS has crashed, any dumps will be done from the host
OS using libvirt's core dump mechanism. The guest OS isn't involved
and is likely too dead to be of any use anyway. Likewise it is
quite probably too dead to work a virtio-serial channel or any
similarly complex device. We're really just after the simplest
possible notification that the guest kernel has paniced.

Regards,
Daniel
Avi Kivity Feb. 29, 2012, 10 a.m. UTC | #16
On 02/29/2012 11:55 AM, Gleb Natapov wrote:
> > >
> > 
> > How about using a virtio-serial channel for this?  You can transfer any
> > amount of information (including the dump itself).
> > 
> Isn't it unreliable after the guest panicked? 

So is calling hypercalls, or dumping, or writing to the screen.  Of
course calling a hypercall is simpler and so is more reliable.

> Having special kdump
> kernel that transfers dump to a host via virtio-serial channel though
> sounds interesting. May be that's what you mean.

Yes.  The "panic, starting dump" signal should be initiated by the
panicking kernel though, in case the dump fails.
Avi Kivity Feb. 29, 2012, 10:05 a.m. UTC | #17
On 02/29/2012 11:58 AM, Daniel P. Berrange wrote:
> > 
> > How about using a virtio-serial channel for this?  You can transfer any
> > amount of information (including the dump itself).
>
> When the guest OS has crashed, any dumps will be done from the host
> OS using libvirt's core dump mechanism. The guest OS isn't involved
> and is likely too dead to be of any use anyway. Likewise it is
> quite probably too dead to work a virtio-serial channel or any
> similarly complex device. We're really just after the simplest
> possible notification that the guest kernel has paniced.

If it's alive enough to panic, it's alive enough to kexec its kdump
kernel.  After that it can do anything.

Guest-internal dumps are more useful IMO that host-initiated dumps.  In
a cloud, the host-initiated dump is left on the host, outside the reach
of the guest admin, outside the guest image where all the symbols are,
and sometimes not even on the same host if a live migration occurred. 
It's more useful in small setups, or if the problem is in the
hypervisor, not the guest.
Gleb Natapov Feb. 29, 2012, 10:05 a.m. UTC | #18
On Wed, Feb 29, 2012 at 12:00:41PM +0200, Avi Kivity wrote:
> On 02/29/2012 11:55 AM, Gleb Natapov wrote:
> > > >
> > > 
> > > How about using a virtio-serial channel for this?  You can transfer any
> > > amount of information (including the dump itself).
> > > 
> > Isn't it unreliable after the guest panicked? 
> 
> So is calling hypercalls, or dumping, or writing to the screen.  Of
> course calling a hypercall is simpler and so is more reliable.
> 
Yes, crash can be so severe that it is not even detected by a kernel
itself, so not OOPS message even printed. But in most cases if kernel is
functional enough to print OOPS it is functional enough to call single
hypercall instruction.

> > Having special kdump
> > kernel that transfers dump to a host via virtio-serial channel though
> > sounds interesting. May be that's what you mean.
> 
> Yes.  The "panic, starting dump" signal should be initiated by the
> panicking kernel though, in case the dump fails.
> 
Then panic hypercall sounds like a reasonable solution.

--
			Gleb.
Wen Congyang Feb. 29, 2012, 10:06 a.m. UTC | #19
At 02/29/2012 05:36 PM, Gleb Natapov Wrote:
> On Wed, Feb 29, 2012 at 09:08:52AM +0800, Wen Congyang wrote:
>> At 02/28/2012 06:45 PM, Gleb Natapov Wrote:
>>> On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote:
>>>> On 2012-02-28 10:42, Wen Congyang wrote:
>>>>> At 02/28/2012 05:34 PM, Jan Kiszka Wrote:
>>>>>> On 2012-02-28 09:23, Wen Congyang wrote:
>>>>>>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote:
>>>>>>>> On 2012-02-27 04:01, Wen Congyang wrote:
>>>>>>>>> We can know the guest is paniced when the guest runs on xen.
>>>>>>>>> But we do not have such feature on kvm. This patch implemnts
>>>>>>>>> this feature, and the implementation is the same as xen:
>>>>>>>>> register panic notifier, and call hypercall when the guest
>>>>>>>>> is paniced.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>>>>>>>> ---
>>>>>>>>>  arch/x86/kernel/kvm.c    |   12 ++++++++++++
>>>>>>>>>  arch/x86/kvm/svm.c       |    8 ++++++--
>>>>>>>>>  arch/x86/kvm/vmx.c       |    8 ++++++--
>>>>>>>>>  arch/x86/kvm/x86.c       |   13 +++++++++++--
>>>>>>>>>  include/linux/kvm.h      |    1 +
>>>>>>>>>  include/linux/kvm_para.h |    1 +
>>>>>>>>>  6 files changed, 37 insertions(+), 6 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>>>>>>>> index f0c6fd6..b928d1d 100644
>>>>>>>>> --- a/arch/x86/kernel/kvm.c
>>>>>>>>> +++ b/arch/x86/kernel/kvm.c
>>>>>>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>>>>>>>>>  	.notifier_call = kvm_pv_reboot_notify,
>>>>>>>>>  };
>>>>>>>>>  
>>>>>>>>> +static int
>>>>>>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>>>>>>>>> +{
>>>>>>>>> +	kvm_hypercall0(KVM_HC_GUEST_PANIC);
>>>>>>>>> +	return NOTIFY_DONE;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static struct notifier_block kvm_pv_panic_nb = {
>>>>>>>>> +	.notifier_call = kvm_pv_panic_notify,
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>
>>>>>>>> You should split up host and guest-side changes.
>>>>>>>>
>>>>>>>>>  static u64 kvm_steal_clock(int cpu)
>>>>>>>>>  {
>>>>>>>>>  	u64 steal;
>>>>>>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void)
>>>>>>>>>  
>>>>>>>>>  	paravirt_ops_setup();
>>>>>>>>>  	register_reboot_notifier(&kvm_pv_reboot_nb);
>>>>>>>>> +	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
>>>>>>>>>  	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
>>>>>>>>>  		spin_lock_init(&async_pf_sleepers[i].lock);
>>>>>>>>>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>>>>>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>>>>>>> index 0b7690e..38b4705 100644
>>>>>>>>> --- a/arch/x86/kvm/svm.c
>>>>>>>>> +++ b/arch/x86/kvm/svm.c
>>>>>>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm)
>>>>>>>>>  
>>>>>>>>>  static int vmmcall_interception(struct vcpu_svm *svm)
>>>>>>>>>  {
>>>>>>>>> +	int ret;
>>>>>>>>> +
>>>>>>>>>  	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
>>>>>>>>>  	skip_emulated_instruction(&svm->vcpu);
>>>>>>>>> -	kvm_emulate_hypercall(&svm->vcpu);
>>>>>>>>> -	return 1;
>>>>>>>>> +	ret = kvm_emulate_hypercall(&svm->vcpu);
>>>>>>>>> +
>>>>>>>>> +	/* Ignore the error? */
>>>>>>>>> +	return ret == 0 ? 0 : 1;
>>>>>>>>
>>>>>>>> Why can't kvm_emulate_hypercall return the right value?
>>>>>>>
>>>>>>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and
>>>>>>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0.
>>>>>>> If vcpu's CPL > 0, does kvm need to exit and tell it to
>>>>>>> qemu?
>>>>>>
>>>>>> No, there is currently no exit to userspace due to hypercalls, neither
>>>>>> of HV nor KVM kind.
>>>>>>
>>>>>> The point is that the return code of kvm_emulate_hypercall is unused so
>>>>>> far, so you can easily redefine it to encode continue vs. exit to
>>>>>> userspace. Once someone has different needs, this could still be
>>>>>> refactored again.
>>>>>
>>>>> So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's
>>>>> CPL > 0?
>>>>
>>>> Yes, change it to encode what vendor modules need to return to their
>>>> callers.
>>>>
>>> Better introduce new request flag and set it in your hypercall emulation. See
>>> how triple fault is handled.
>>
>> triple fault sets KVM_EXIT_SHUTDOWN and exits to userspace. Do you mean introduce
>> a new value(like KVM_EXIT_SHUTDOWN)?
>>
> I mean introduce new request bit (like KVM_REQ_TRIPLE_FAULT) and set it
> in your hypercall if exit to userspace is needed instead of changing
> return values.

I donot notice it. Thanks for you suggestion. I will modify my patch.

Thanks
Wen Congyang

> 
> --
> 			Gleb.
>
Avi Kivity Feb. 29, 2012, 10:08 a.m. UTC | #20
On 02/29/2012 12:05 PM, Gleb Natapov wrote:
> On Wed, Feb 29, 2012 at 12:00:41PM +0200, Avi Kivity wrote:
> > On 02/29/2012 11:55 AM, Gleb Natapov wrote:
> > > > >
> > > > 
> > > > How about using a virtio-serial channel for this?  You can transfer any
> > > > amount of information (including the dump itself).
> > > > 
> > > Isn't it unreliable after the guest panicked? 
> > 
> > So is calling hypercalls, or dumping, or writing to the screen.  Of
> > course calling a hypercall is simpler and so is more reliable.
> > 
> Yes, crash can be so severe that it is not even detected by a kernel
> itself, so not OOPS message even printed. But in most cases if kernel is
> functional enough to print OOPS it is functional enough to call single
> hypercall instruction.

Why not print the oops to virtio-serial?  Or even just a regular serial
port?  That's what bare metal does.

> > > Having special kdump
> > > kernel that transfers dump to a host via virtio-serial channel though
> > > sounds interesting. May be that's what you mean.
> > 
> > Yes.  The "panic, starting dump" signal should be initiated by the
> > panicking kernel though, in case the dump fails.
> > 
> Then panic hypercall sounds like a reasonable solution.

It is, but I'm trying to see if we can get away with doing nothing.
Wen Congyang Feb. 29, 2012, 10:17 a.m. UTC | #21
At 02/29/2012 06:08 PM, Avi Kivity Wrote:
> On 02/29/2012 12:05 PM, Gleb Natapov wrote:
>> On Wed, Feb 29, 2012 at 12:00:41PM +0200, Avi Kivity wrote:
>>> On 02/29/2012 11:55 AM, Gleb Natapov wrote:
>>>>>>
>>>>>
>>>>> How about using a virtio-serial channel for this?  You can transfer any
>>>>> amount of information (including the dump itself).
>>>>>
>>>> Isn't it unreliable after the guest panicked? 
>>>
>>> So is calling hypercalls, or dumping, or writing to the screen.  Of
>>> course calling a hypercall is simpler and so is more reliable.
>>>
>> Yes, crash can be so severe that it is not even detected by a kernel
>> itself, so not OOPS message even printed. But in most cases if kernel is
>> functional enough to print OOPS it is functional enough to call single
>> hypercall instruction.
> 
> Why not print the oops to virtio-serial?  Or even just a regular serial
> port?  That's what bare metal does.

If virtio-serial's driver has bug or the guest doesn't have such device...

> 
>>>> Having special kdump
>>>> kernel that transfers dump to a host via virtio-serial channel though
>>>> sounds interesting. May be that's what you mean.
>>>
>>> Yes.  The "panic, starting dump" signal should be initiated by the
>>> panicking kernel though, in case the dump fails.
>>>
>> Then panic hypercall sounds like a reasonable solution.
> 
> It is, but I'm trying to see if we can get away with doing nothing.
> 

If we have a reliable way with doing nothing, it is better. But I donot
find such way.

Thanks
Wen Congyang
Daniel P. Berrangé Feb. 29, 2012, 10:19 a.m. UTC | #22
On Wed, Feb 29, 2012 at 12:05:32PM +0200, Avi Kivity wrote:
> On 02/29/2012 11:58 AM, Daniel P. Berrange wrote:
> > > 
> > > How about using a virtio-serial channel for this?  You can transfer any
> > > amount of information (including the dump itself).
> >
> > When the guest OS has crashed, any dumps will be done from the host
> > OS using libvirt's core dump mechanism. The guest OS isn't involved
> > and is likely too dead to be of any use anyway. Likewise it is
> > quite probably too dead to work a virtio-serial channel or any
> > similarly complex device. We're really just after the simplest
> > possible notification that the guest kernel has paniced.
> 
> If it's alive enough to panic, it's alive enough to kexec its kdump
> kernel.  After that it can do anything.
> 
> Guest-internal dumps are more useful IMO that host-initiated dumps.  In
> a cloud, the host-initiated dump is left on the host, outside the reach
> of the guest admin, outside the guest image where all the symbols are,
> and sometimes not even on the same host if a live migration occurred. 
> It's more useful in small setups, or if the problem is in the
> hypervisor, not the guest.

I don't think guest vs host dumps should be considered mutually exclusive,
they both have pluses+minuses.

Configuring kexec+kdump requires non-negligable guest admin configuration
work before it's usable, and this work is guest OS specific, if it is possible
at all. A permanent panic notifier that's built in the kernel by default
requires zero guest admin config, and can allow host admin to automate
collection of dumps across all their hosts/guests. The KVM hypercall
notification is fairly trivially ported to any OS kernel, by comparison
with a full virtio + virtio-serial impl.

Regards,
Daniel
Wen Congyang Feb. 29, 2012, 10:31 a.m. UTC | #23
At 02/29/2012 06:05 PM, Avi Kivity Wrote:
> On 02/29/2012 11:58 AM, Daniel P. Berrange wrote:
>>>
>>> How about using a virtio-serial channel for this?  You can transfer any
>>> amount of information (including the dump itself).
>>
>> When the guest OS has crashed, any dumps will be done from the host
>> OS using libvirt's core dump mechanism. The guest OS isn't involved
>> and is likely too dead to be of any use anyway. Likewise it is
>> quite probably too dead to work a virtio-serial channel or any
>> similarly complex device. We're really just after the simplest
>> possible notification that the guest kernel has paniced.
> 
> If it's alive enough to panic, it's alive enough to kexec its kdump
> kernel.  After that it can do anything.
> 
> Guest-internal dumps are more useful IMO that host-initiated dumps.  In

Yes, guest-internal dump is better than host dump. But the user may not
start guest-internal dump or guest-internal dump failed. So we need the
following feature:
1. If the guest-internal dump does not work, the guest's status is 'crashed'.
   And then the user does the host dump.
2. If the guest-internal dump is working, the guest's status should be
   'dumping'. The user see this status and know the guest has paniced, and
    the guest-internal dump is working.

Thanks
Wen Congyang

> a cloud, the host-initiated dump is left on the host, outside the reach
> of the guest admin, outside the guest image where all the symbols are,
> and sometimes not even on the same host if a live migration occurred. 
> It's more useful in small setups, or if the problem is in the
> hypervisor, not the guest.
>
Avi Kivity Feb. 29, 2012, 10:39 a.m. UTC | #24
On 02/29/2012 12:17 PM, Wen Congyang wrote:
> >>>
> >> Yes, crash can be so severe that it is not even detected by a kernel
> >> itself, so not OOPS message even printed. But in most cases if kernel is
> >> functional enough to print OOPS it is functional enough to call single
> >> hypercall instruction.
> > 
> > Why not print the oops to virtio-serial?  Or even just a regular serial
> > port?  That's what bare metal does.
>
> If virtio-serial's driver has bug or the guest doesn't have such device...

We have the same issue with the hypercall; and virtio-serial is
available on many deployed versions.

> > 
> >>>> Having special kdump
> >>>> kernel that transfers dump to a host via virtio-serial channel though
> >>>> sounds interesting. May be that's what you mean.
> >>>
> >>> Yes.  The "panic, starting dump" signal should be initiated by the
> >>> panicking kernel though, in case the dump fails.
> >>>
> >> Then panic hypercall sounds like a reasonable solution.
> > 
> > It is, but I'm trying to see if we can get away with doing nothing.
> > 
>
> If we have a reliable way with doing nothing, it is better. But I donot
> find such way.

We won't have a 100% reliable way.  But I think a variant of the driver
that doesn't use interrupts, or just using the ordinary serial driver,
should be reliable enough.
Avi Kivity Feb. 29, 2012, 10:44 a.m. UTC | #25
On 02/29/2012 12:19 PM, Daniel P. Berrange wrote:
> On Wed, Feb 29, 2012 at 12:05:32PM +0200, Avi Kivity wrote:
> > On 02/29/2012 11:58 AM, Daniel P. Berrange wrote:
> > > > 
> > > > How about using a virtio-serial channel for this?  You can transfer any
> > > > amount of information (including the dump itself).
> > >
> > > When the guest OS has crashed, any dumps will be done from the host
> > > OS using libvirt's core dump mechanism. The guest OS isn't involved
> > > and is likely too dead to be of any use anyway. Likewise it is
> > > quite probably too dead to work a virtio-serial channel or any
> > > similarly complex device. We're really just after the simplest
> > > possible notification that the guest kernel has paniced.
> > 
> > If it's alive enough to panic, it's alive enough to kexec its kdump
> > kernel.  After that it can do anything.
> > 
> > Guest-internal dumps are more useful IMO that host-initiated dumps.  In
> > a cloud, the host-initiated dump is left on the host, outside the reach
> > of the guest admin, outside the guest image where all the symbols are,
> > and sometimes not even on the same host if a live migration occurred. 
> > It's more useful in small setups, or if the problem is in the
> > hypervisor, not the guest.
>
> I don't think guest vs host dumps should be considered mutually exclusive,
> they both have pluses+minuses.

True.

> Configuring kexec+kdump requires non-negligable guest admin configuration
> work before it's usable, and this work is guest OS specific, if it is possible
> at all. 

I think it's on by default on Windows and requires installing a package
on Linux, which may be part of the default configuration on many distros.

> A permanent panic notifier that's built in the kernel by default
> requires zero guest admin config, and can allow host admin to automate
> collection of dumps across all their hosts/guests. The KVM hypercall
> notification is fairly trivially ported to any OS kernel, by comparison
> with a full virtio + virtio-serial impl.

That's the path of least resistance.  But it's not necessarily the best
path.  We end up with a wide set of disconnected ABIs instead of a
narrow set that is more flexible.
Gleb Natapov Feb. 29, 2012, 10:44 a.m. UTC | #26
On Wed, Feb 29, 2012 at 12:08:19PM +0200, Avi Kivity wrote:
> On 02/29/2012 12:05 PM, Gleb Natapov wrote:
> > On Wed, Feb 29, 2012 at 12:00:41PM +0200, Avi Kivity wrote:
> > > On 02/29/2012 11:55 AM, Gleb Natapov wrote:
> > > > > >
> > > > > 
> > > > > How about using a virtio-serial channel for this?  You can transfer any
> > > > > amount of information (including the dump itself).
> > > > > 
> > > > Isn't it unreliable after the guest panicked? 
> > > 
> > > So is calling hypercalls, or dumping, or writing to the screen.  Of
> > > course calling a hypercall is simpler and so is more reliable.
> > > 
> > Yes, crash can be so severe that it is not even detected by a kernel
> > itself, so not OOPS message even printed. But in most cases if kernel is
> > functional enough to print OOPS it is functional enough to call single
> > hypercall instruction.
> 
> Why not print the oops to virtio-serial?  Or even just a regular serial
> port?  That's what bare metal does.
> 
The more interface is complex the more chances it will fail during
panic. Regular serial is likely more reliable than virtio-serial.
Hypercall is likely more reliable than uart. On serial user can
fake panic notification. Can this be problematic?

> > > > Having special kdump
> > > > kernel that transfers dump to a host via virtio-serial channel though
> > > > sounds interesting. May be that's what you mean.
> > > 
> > > Yes.  The "panic, starting dump" signal should be initiated by the
> > > panicking kernel though, in case the dump fails.
> > > 
> > Then panic hypercall sounds like a reasonable solution.
> 
> It is, but I'm trying to see if we can get away with doing nothing.
> 
Fair enough.

--
			Gleb.
Avi Kivity Feb. 29, 2012, 10:46 a.m. UTC | #27
On 02/29/2012 12:31 PM, Wen Congyang wrote:
> At 02/29/2012 06:05 PM, Avi Kivity Wrote:
> > On 02/29/2012 11:58 AM, Daniel P. Berrange wrote:
> >>>
> >>> How about using a virtio-serial channel for this?  You can transfer any
> >>> amount of information (including the dump itself).
> >>
> >> When the guest OS has crashed, any dumps will be done from the host
> >> OS using libvirt's core dump mechanism. The guest OS isn't involved
> >> and is likely too dead to be of any use anyway. Likewise it is
> >> quite probably too dead to work a virtio-serial channel or any
> >> similarly complex device. We're really just after the simplest
> >> possible notification that the guest kernel has paniced.
> > 
> > If it's alive enough to panic, it's alive enough to kexec its kdump
> > kernel.  After that it can do anything.
> > 
> > Guest-internal dumps are more useful IMO that host-initiated dumps.  In
>
> Yes, guest-internal dump is better than host dump. But the user may not
> start guest-internal dump or guest-internal dump failed. So we need the
> following feature:
> 1. If the guest-internal dump does not work, the guest's status is 'crashed'.
>    And then the user does the host dump.
> 2. If the guest-internal dump is working, the guest's status should be
>    'dumping'. The user see this status and know the guest has paniced, and
>     the guest-internal dump is working.

I agree.  There is room for host dump, and we do want notification about
what the guest is doing.  The question is whether we should reuse
virtio-serial for guest-host communication in this case.  It's more
complicated, but allows us to avoid touching the hypervisor.
Avi Kivity Feb. 29, 2012, 10:48 a.m. UTC | #28
On 02/29/2012 12:44 PM, Gleb Natapov wrote:
> > > > 
> > > Yes, crash can be so severe that it is not even detected by a kernel
> > > itself, so not OOPS message even printed. But in most cases if kernel is
> > > functional enough to print OOPS it is functional enough to call single
> > > hypercall instruction.
> > 
> > Why not print the oops to virtio-serial?  Or even just a regular serial
> > port?  That's what bare metal does.
> > 
> The more interface is complex the more chances it will fail during
> panic. Regular serial is likely more reliable than virtio-serial.
> Hypercall is likely more reliable than uart. On serial user can
> fake panic notification. 

The serial device is under control of the kernel, so the user can only
access it if the kernel so allows.

> Can this be problematic?

From the point of view of the guest, yes, it can generate support calls
in fill up the admin's dump quota.  From the point of view of the host,
there's no difference between the guest's admin and a random user.
Gleb Natapov Feb. 29, 2012, 10:52 a.m. UTC | #29
On Wed, Feb 29, 2012 at 12:48:57PM +0200, Avi Kivity wrote:
> On 02/29/2012 12:44 PM, Gleb Natapov wrote:
> > > > > 
> > > > Yes, crash can be so severe that it is not even detected by a kernel
> > > > itself, so not OOPS message even printed. But in most cases if kernel is
> > > > functional enough to print OOPS it is functional enough to call single
> > > > hypercall instruction.
> > > 
> > > Why not print the oops to virtio-serial?  Or even just a regular serial
> > > port?  That's what bare metal does.
> > > 
> > The more interface is complex the more chances it will fail during
> > panic. Regular serial is likely more reliable than virtio-serial.
> > Hypercall is likely more reliable than uart. On serial user can
> > fake panic notification. 
> 
> The serial device is under control of the kernel, so the user can only
> access it if the kernel so allows.
> 
Yes, but if we will hijack UART for panic notification VM user will not
be able to use it for anything else. virtio-serial have many channels,
but AFAIK it does not work at early stages of boot process.

--
			Gleb.
Wen Congyang March 1, 2012, 3:34 a.m. UTC | #30
At 02/29/2012 06:39 PM, Avi Kivity Wrote:
> On 02/29/2012 12:17 PM, Wen Congyang wrote:
>>>>>
>>>> Yes, crash can be so severe that it is not even detected by a kernel
>>>> itself, so not OOPS message even printed. But in most cases if kernel is
>>>> functional enough to print OOPS it is functional enough to call single
>>>> hypercall instruction.
>>>
>>> Why not print the oops to virtio-serial?  Or even just a regular serial
>>> port?  That's what bare metal does.
>>
>> If virtio-serial's driver has bug or the guest doesn't have such device...
> 
> We have the same issue with the hypercall; and virtio-serial is
> available on many deployed versions.

How to know whether a guest has virtio-serial?

Thanks
Wen Congyang

> 
>>>
>>>>>> Having special kdump
>>>>>> kernel that transfers dump to a host via virtio-serial channel though
>>>>>> sounds interesting. May be that's what you mean.
>>>>>
>>>>> Yes.  The "panic, starting dump" signal should be initiated by the
>>>>> panicking kernel though, in case the dump fails.
>>>>>
>>>> Then panic hypercall sounds like a reasonable solution.
>>>
>>> It is, but I'm trying to see if we can get away with doing nothing.
>>>
>>
>> If we have a reliable way with doing nothing, it is better. But I donot
>> find such way.
> 
> We won't have a 100% reliable way.  But I think a variant of the driver
> that doesn't use interrupts, or just using the ordinary serial driver,
> should be reliable enough.
>
Wen Congyang March 1, 2012, 5:21 a.m. UTC | #31
At 02/29/2012 06:39 PM, Avi Kivity Wrote:
> On 02/29/2012 12:17 PM, Wen Congyang wrote:
>>>>>
>>>> Yes, crash can be so severe that it is not even detected by a kernel
>>>> itself, so not OOPS message even printed. But in most cases if kernel is
>>>> functional enough to print OOPS it is functional enough to call single
>>>> hypercall instruction.
>>>
>>> Why not print the oops to virtio-serial?  Or even just a regular serial
>>> port?  That's what bare metal does.
>>
>> If virtio-serial's driver has bug or the guest doesn't have such device...
> 
> We have the same issue with the hypercall; and virtio-serial is
> available on many deployed versions.

virtio-serial is available, but it is an optional device. If the guest does
not have this device, the guest cannot tell the host that is is paniced. So
I still prefer to touch the hypervisor.

Thanks
Wen Congyang

> 
>>>
>>>>>> Having special kdump
>>>>>> kernel that transfers dump to a host via virtio-serial channel though
>>>>>> sounds interesting. May be that's what you mean.
>>>>>
>>>>> Yes.  The "panic, starting dump" signal should be initiated by the
>>>>> panicking kernel though, in case the dump fails.
>>>>>
>>>> Then panic hypercall sounds like a reasonable solution.
>>>
>>> It is, but I'm trying to see if we can get away with doing nothing.
>>>
>>
>> If we have a reliable way with doing nothing, it is better. But I donot
>> find such way.
> 
> We won't have a 100% reliable way.  But I think a variant of the driver
> that doesn't use interrupts, or just using the ordinary serial driver,
> should be reliable enough.
>
diff mbox

Patch

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f0c6fd6..b928d1d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -331,6 +331,17 @@  static struct notifier_block kvm_pv_reboot_nb = {
 	.notifier_call = kvm_pv_reboot_notify,
 };
 
+static int
+kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
+{
+	kvm_hypercall0(KVM_HC_GUEST_PANIC);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kvm_pv_panic_nb = {
+	.notifier_call = kvm_pv_panic_notify,
+};
+
 static u64 kvm_steal_clock(int cpu)
 {
 	u64 steal;
@@ -417,6 +428,7 @@  void __init kvm_guest_init(void)
 
 	paravirt_ops_setup();
 	register_reboot_notifier(&kvm_pv_reboot_nb);
+	atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb);
 	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
 		spin_lock_init(&async_pf_sleepers[i].lock);
 	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0b7690e..38b4705 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1900,10 +1900,14 @@  static int halt_interception(struct vcpu_svm *svm)
 
 static int vmmcall_interception(struct vcpu_svm *svm)
 {
+	int ret;
+
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	skip_emulated_instruction(&svm->vcpu);
-	kvm_emulate_hypercall(&svm->vcpu);
-	return 1;
+	ret = kvm_emulate_hypercall(&svm->vcpu);
+
+	/* Ignore the error? */
+	return ret == 0 ? 0 : 1;
 }
 
 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 66147ca..1b57ebb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4582,9 +4582,13 @@  static int handle_halt(struct kvm_vcpu *vcpu)
 
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	skip_emulated_instruction(vcpu);
-	kvm_emulate_hypercall(vcpu);
-	return 1;
+	ret = kvm_emulate_hypercall(vcpu);
+
+	/* Ignore the error? */
+	return ret == 0 ? 0 : 1;
 }
 
 static int handle_invd(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9d99e5..3fc2853 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4923,7 +4923,7 @@  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	u64 param, ingpa, outgpa, ret;
 	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
 	bool fast, longmode;
-	int cs_db, cs_l;
+	int cs_db, cs_l, r = 1;
 
 	/*
 	 * hypercall generates UD from non zero cpl and real mode
@@ -4964,6 +4964,10 @@  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
 		kvm_vcpu_on_spin(vcpu);
 		break;
+	case KVM_HC_GUEST_PANIC:
+		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
+		r = 0;
+		break;
 	default:
 		res = HV_STATUS_INVALID_HYPERCALL_CODE;
 		break;
@@ -4977,7 +4981,7 @@  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
 	}
 
-	return 1;
+	return r;
 }
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5013,6 +5017,11 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
 		break;
+	case KVM_HC_GUEST_PANIC:
+		ret = 0;
+		vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC;
+		r = 0;
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index acbe429..8f0e31b 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -163,6 +163,7 @@  struct kvm_pit_config {
 #define KVM_EXIT_OSI              18
 #define KVM_EXIT_PAPR_HCALL	  19
 #define KVM_EXIT_S390_UCONTROL	  20
+#define KVM_EXIT_GUEST_PANIC	  21
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index ff476dd..cf94023 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -19,6 +19,7 @@ 
 #define KVM_HC_MMU_OP			2
 #define KVM_HC_FEATURES			3
 #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
+#define KVM_HC_GUEST_PANIC		5
 
 /*
  * hypercalls use architecture specific