Patchwork kernel/kvm: fix improper nmi emulation (was: Re: [PATCH] qemu: Fix inject-nmi)

login
register
mail settings
Submitter Lai Jiangshan
Date Oct. 10, 2011, 6:06 a.m.
Message ID <4E928B4E.2080207@cn.fujitsu.com>
Download mbox | patch
Permalink /patch/118648/
State New
Headers show

Comments

Lai Jiangshan - Oct. 10, 2011, 6:06 a.m.
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>

Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
button event happens. This doesn't properly emulate real hardware on
which NMI button event triggers LINT1. Because of this, NMI is sent to
the processor even when LINT1 is maskied in LVT. For example, this
causes the problem that kdump initiated by NMI sometimes doesn't work
on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.

With this patch, KVM_NMI ioctl is handled as follows.

- When in-kernel irqchip is enabled, KVM_NMI ioctl is handled as a
  request of triggering LINT1 on the processor. LINT1 is emulated in
  in-kernel irqchip.

- When in-kernel irqchip is disabled, KVM_NMI ioctl is handled as a
  request of injecting NMI to the processor. This assumes LINT1 is
  already emulated in userland.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Tested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 arch/x86/kvm/irq.h   |    1 +
 arch/x86/kvm/lapic.c |    8 ++++++++
 arch/x86/kvm/x86.c   |   14 ++++----------
 3 files changed, 13 insertions(+), 10 deletions(-)
Jan Kiszka - Oct. 10, 2011, 6:40 a.m.
On 2011-10-10 08:06, Lai Jiangshan wrote:
> From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
> 
> Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
> button event happens. This doesn't properly emulate real hardware on
> which NMI button event triggers LINT1. Because of this, NMI is sent to
> the processor even when LINT1 is maskied in LVT. For example, this
> causes the problem that kdump initiated by NMI sometimes doesn't work
> on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
> 
> With this patch, KVM_NMI ioctl is handled as follows.
> 
> - When in-kernel irqchip is enabled, KVM_NMI ioctl is handled as a
>   request of triggering LINT1 on the processor. LINT1 is emulated in
>   in-kernel irqchip.
> 
> - When in-kernel irqchip is disabled, KVM_NMI ioctl is handled as a
>   request of injecting NMI to the processor. This assumes LINT1 is
>   already emulated in userland.
> 
> Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
> Tested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
>  arch/x86/kvm/irq.h   |    1 +
>  arch/x86/kvm/lapic.c |    8 ++++++++
>  arch/x86/kvm/x86.c   |   14 ++++----------
>  3 files changed, 13 insertions(+), 10 deletions(-)
> 
> Index: linux/arch/x86/kvm/irq.h
> ===================================================================
> --- linux.orig/arch/x86/kvm/irq.h
> +++ linux/arch/x86/kvm/irq.h
> @@ -95,6 +95,7 @@ void kvm_pic_reset(struct kvm_kpic_state
>  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
>  void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
>  void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
> +void kvm_apic_lint1_deliver(struct kvm_vcpu *vcpu);
>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
>  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
>  void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
> Index: linux/arch/x86/kvm/lapic.c
> ===================================================================
> --- linux.orig/arch/x86/kvm/lapic.c
> +++ linux/arch/x86/kvm/lapic.c
> @@ -1039,6 +1039,14 @@ void kvm_apic_nmi_wd_deliver(struct kvm_
>  		kvm_apic_local_deliver(apic, APIC_LVT0);
>  }
>  
> +void kvm_apic_lint1_deliver(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +
> +	if (apic)

WARN_ON(!apic)? Looks like that case would be a kernel bug.

> +		kvm_apic_local_deliver(apic, APIC_LVT1);
> +}
> +
>  static struct kvm_timer_ops lapic_timer_ops = {
>  	.is_periodic = lapic_is_periodic,
>  };
> Index: linux/arch/x86/kvm/x86.c
> ===================================================================
> --- linux.orig/arch/x86/kvm/x86.c
> +++ linux/arch/x86/kvm/x86.c
> @@ -2729,13 +2729,6 @@ static int kvm_vcpu_ioctl_interrupt(stru
>  	return 0;
>  }
>  
> -static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
> -{
> -	kvm_inject_nmi(vcpu);
> -
> -	return 0;
> -}
> -
>  static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
>  					   struct kvm_tpr_access_ctl *tac)
>  {
> @@ -3038,9 +3031,10 @@ long kvm_arch_vcpu_ioctl(struct file *fi
>  		break;
>  	}
>  	case KVM_NMI: {
> -		r = kvm_vcpu_ioctl_nmi(vcpu);
> -		if (r)
> -			goto out;
> +		if (irqchip_in_kernel(vcpu->kvm))
> +			kvm_apic_lint1_deliver(vcpu);
> +		else
> +			kvm_inject_nmi(vcpu);
>  		r = 0;
>  		break;
>  	}

Looks OK otherwise.

Jan
Avi Kivity - Oct. 10, 2011, 10:26 a.m.
On 10/10/2011 08:06 AM, Lai Jiangshan wrote:
> From: Kenji Kaneshige<kaneshige.kenji@jp.fujitsu.com>
>
> Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
> button event happens. This doesn't properly emulate real hardware on
> which NMI button event triggers LINT1. Because of this, NMI is sent to
> the processor even when LINT1 is maskied in LVT. For example, this
> causes the problem that kdump initiated by NMI sometimes doesn't work
> on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
>
> With this patch, KVM_NMI ioctl is handled as follows.
>
> - When in-kernel irqchip is enabled, KVM_NMI ioctl is handled as a
>    request of triggering LINT1 on the processor. LINT1 is emulated in
>    in-kernel irqchip.
>
> - When in-kernel irqchip is disabled, KVM_NMI ioctl is handled as a
>    request of injecting NMI to the processor. This assumes LINT1 is
>    already emulated in userland.

Please add a KVM_NMI section to Documentation/virtual/kvm/api.txt.

>
> -static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
> -{
> -	kvm_inject_nmi(vcpu);
> -
> -	return 0;
> -}
> -
>   static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
>   					   struct kvm_tpr_access_ctl *tac)
>   {
> @@ -3038,9 +3031,10 @@ long kvm_arch_vcpu_ioctl(struct file *fi
>   		break;
>   	}
>   	case KVM_NMI: {
> -		r = kvm_vcpu_ioctl_nmi(vcpu);
> -		if (r)
> -			goto out;
> +		if (irqchip_in_kernel(vcpu->kvm))
> +			kvm_apic_lint1_deliver(vcpu);
> +		else
> +			kvm_inject_nmi(vcpu);
>   		r = 0;
>   		break;
>   	}

Why did you drop kvm_vcpu_ioctl_nmi()?

Please add (and document) a KVM_CAP flag that lets userspace know the 
new behaviour is supported.
Kenji Kaneshige - Oct. 12, 2011, 7:01 a.m.
(2011/10/10 19:26), Avi Kivity wrote:
> On 10/10/2011 08:06 AM, Lai Jiangshan wrote:
>> From: Kenji Kaneshige<kaneshige.kenji@jp.fujitsu.com>
>>
>> Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
>> button event happens. This doesn't properly emulate real hardware on
>> which NMI button event triggers LINT1. Because of this, NMI is sent to
>> the processor even when LINT1 is maskied in LVT. For example, this
>> causes the problem that kdump initiated by NMI sometimes doesn't work
>> on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
>>
>> With this patch, KVM_NMI ioctl is handled as follows.
>>
>> - When in-kernel irqchip is enabled, KVM_NMI ioctl is handled as a
>> request of triggering LINT1 on the processor. LINT1 is emulated in
>> in-kernel irqchip.
>>
>> - When in-kernel irqchip is disabled, KVM_NMI ioctl is handled as a
>> request of injecting NMI to the processor. This assumes LINT1 is
>> already emulated in userland.
> 
> Please add a KVM_NMI section to Documentation/virtual/kvm/api.txt.
> 
>>
>> -static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
>> -{
>> - kvm_inject_nmi(vcpu);
>> -
>> - return 0;
>> -}
>> -
>> static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
>> struct kvm_tpr_access_ctl *tac)
>> {
>> @@ -3038,9 +3031,10 @@ long kvm_arch_vcpu_ioctl(struct file *fi
>> break;
>> }
>> case KVM_NMI: {
>> - r = kvm_vcpu_ioctl_nmi(vcpu);
>> - if (r)
>> - goto out;
>> + if (irqchip_in_kernel(vcpu->kvm))
>> + kvm_apic_lint1_deliver(vcpu);
>> + else
>> + kvm_inject_nmi(vcpu);
>> r = 0;
>> break;
>> }
> 
> Why did you drop kvm_vcpu_ioctl_nmi()?
> 
> Please add (and document) a KVM_CAP flag that lets userspace know the new behaviour is supported.
> 

Sorry for the delayed responding.

I don't understand why new KVM_CAP flag is needed.

I think the old behavior was clearly a bug, and new behavior is not a new
capability. Furthermore, the kvm patch and the qemu patch in this patchset
can be applied independently. If only the kvm patch is applied, NMI bug in
kernel irq is fixed and qemu NMI behavior is not changed. If the only the
qemu patch is applied, qemu NMI bug is fixed and the NMI behavior in kernel
irq is not changed.

Regards,
Kenji Kaneshige

Patch

Index: linux/arch/x86/kvm/irq.h
===================================================================
--- linux.orig/arch/x86/kvm/irq.h
+++ linux/arch/x86/kvm/irq.h
@@ -95,6 +95,7 @@  void kvm_pic_reset(struct kvm_kpic_state
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
+void kvm_apic_lint1_deliver(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
Index: linux/arch/x86/kvm/lapic.c
===================================================================
--- linux.orig/arch/x86/kvm/lapic.c
+++ linux/arch/x86/kvm/lapic.c
@@ -1039,6 +1039,14 @@  void kvm_apic_nmi_wd_deliver(struct kvm_
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
+void kvm_apic_lint1_deliver(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		kvm_apic_local_deliver(apic, APIC_LVT1);
+}
+
 static struct kvm_timer_ops lapic_timer_ops = {
 	.is_periodic = lapic_is_periodic,
 };
Index: linux/arch/x86/kvm/x86.c
===================================================================
--- linux.orig/arch/x86/kvm/x86.c
+++ linux/arch/x86/kvm/x86.c
@@ -2729,13 +2729,6 @@  static int kvm_vcpu_ioctl_interrupt(stru
 	return 0;
 }
 
-static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
-{
-	kvm_inject_nmi(vcpu);
-
-	return 0;
-}
-
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -3038,9 +3031,10 @@  long kvm_arch_vcpu_ioctl(struct file *fi
 		break;
 	}
 	case KVM_NMI: {
-		r = kvm_vcpu_ioctl_nmi(vcpu);
-		if (r)
-			goto out;
+		if (irqchip_in_kernel(vcpu->kvm))
+			kvm_apic_lint1_deliver(vcpu);
+		else
+			kvm_inject_nmi(vcpu);
 		r = 0;
 		break;
 	}