diff mbox

[2/2,V5] qemu-kvm: fix improper nmi emulation

Message ID 4E97FACE.7030509@cn.fujitsu.com
State New
Headers show

Commit Message

Lai Jiangshan Oct. 14, 2011, 9:03 a.m. UTC
Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
button event happens. This doesn't properly emulate real hardware on
which NMI button event triggers LINT1. Because of this, NMI is sent to
the processor even when LINT1 is masked in LVT. For example, this
causes the problem that kdump initiated by NMI sometimes doesn't work
on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.

With this patch, inject-nmi request is handled as follows.

- When in-kernel irqchip is enabled and KVM_SET_LINT1 is enabled,
  inject LINT1 instead of NMI interrupt.

- otherwise when in-kernel irqchip is enabled, get the in-kernel
  LAPIC states and test the APIC_LVT_MASKED, if LINT1 is unmasked,
  and then delivering the NMI directly.

- otherwise, userland lapic emulates NMI button and inject NMI
  if it is unmasked.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Reported-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
---
 hw/apic.c |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/apic.h |    1 +
 monitor.c |    6 ++++-
 3 files changed, 78 insertions(+), 1 deletions(-)

Comments

Jan Kiszka Oct. 14, 2011, 9:22 a.m. UTC | #1
On 2011-10-14 11:03, Lai Jiangshan wrote:
> Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
> button event happens. This doesn't properly emulate real hardware on
> which NMI button event triggers LINT1. Because of this, NMI is sent to
> the processor even when LINT1 is masked in LVT. For example, this
> causes the problem that kdump initiated by NMI sometimes doesn't work
> on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
> 
> With this patch, inject-nmi request is handled as follows.
> 
> - When in-kernel irqchip is enabled and KVM_SET_LINT1 is enabled,
>   inject LINT1 instead of NMI interrupt.
> 
> - otherwise when in-kernel irqchip is enabled, get the in-kernel
>   LAPIC states and test the APIC_LVT_MASKED, if LINT1 is unmasked,
>   and then delivering the NMI directly.
> 
> - otherwise, userland lapic emulates NMI button and inject NMI
>   if it is unmasked.
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> Reported-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
> ---
>  hw/apic.c |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/apic.h |    1 +
>  monitor.c |    6 ++++-
>  3 files changed, 78 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/apic.c b/hw/apic.c
> index 69d6ac5..91b82d0 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -205,6 +205,78 @@ void apic_deliver_pic_intr(DeviceState *d, int level)
>      }
>  }
>  
> +#ifdef KVM_CAP_IRQCHIP

Please read all my comments. That unfortunately also applies to the rest
of the patch.

> +static inline uint32_t kapic_reg(struct kvm_lapic_state *kapic, int reg_id);
> +
> +static void kvm_irqchip_deliver_nmi(void *p)
> +{
> +    APICState *s = p;
> +    struct kvm_lapic_state klapic;
> +    uint32_t lvt;
> +
> +    kvm_get_lapic(s->cpu_env, &klapic);
> +    lvt = kapic_reg(&klapic, 0x32 + APIC_LVT_LINT1);
> +
> +    if (lvt & APIC_LVT_MASKED) {
> +        return;
> +    }
> +
> +    if (((lvt >> 8) & 7) != APIC_DM_NMI) {
> +        return;
> +    }
> +
> +    kvm_vcpu_ioctl(s->cpu_env, KVM_NMI);
> +}
> +
> +static void __apic_deliver_nmi(APICState *s)
> +{
> +    if (kvm_irqchip_in_kernel()) {
> +        run_on_cpu(s->cpu_env, kvm_irqchip_deliver_nmi, s);
> +    } else {
> +        apic_local_deliver(s, APIC_LVT_LINT1);
> +    }
> +}
> +#else
> +static void __apic_deliver_nmi(APICState *s)
> +{
> +    apic_local_deliver(s, APIC_LVT_LINT1);
> +}
> +#endif
> +
> +enum {
> +    KVM_SET_LINT1_UNKNOWN,
> +    KVM_SET_LINT1_ENABLED,
> +    KVM_SET_LINT1_DISABLED,
> +};
> +
> +static void kvm_set_lint1(void *p)
> +{
> +    CPUState *env = p;
> +
> +    kvm_vcpu_ioctl(env, KVM_SET_LINT1);
> +}
> +
> +void apic_deliver_nmi(DeviceState *d)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> +    static int kernel_lint1 = KVM_SET_LINT1_UNKNOWN;
> +
> +    if (kernel_lint1 == KVM_SET_LINT1_UNKNOWN) {
> +        if (kvm_enabled() && kvm_irqchip_in_kernel() &&
> +            kvm_check_extension(kvm_state, KVM_CAP_SET_LINT1)) {

That CAP test belongs where the injection shall happen. Here you decide
about user space vs. kernel space APIC model.

Let's try it together:

if kvm_enabled && kvm_irqchip_in_kernel
	run_on_cpu(kvm_apic_deliver_nmi)
else
	apic_local_deliver(APIC_LVT_LINT1)

with kvm_acpi_deliver_nmi like this:

if !check_extention(CAP_SET_LINT1)
	get_kernel_apic_state
	if !nmi_acceptable
		return
kvm_vcpu_ioctl(KVM_NMI)

Please don't trust me blindly and re-check, but this is how the scenario
looks like to me.

Thanks for your patience,
Jan
diff mbox

Patch

diff --git a/hw/apic.c b/hw/apic.c
index 69d6ac5..91b82d0 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -205,6 +205,78 @@  void apic_deliver_pic_intr(DeviceState *d, int level)
     }
 }
 
+#ifdef KVM_CAP_IRQCHIP
+static inline uint32_t kapic_reg(struct kvm_lapic_state *kapic, int reg_id);
+
+static void kvm_irqchip_deliver_nmi(void *p)
+{
+    APICState *s = p;
+    struct kvm_lapic_state klapic;
+    uint32_t lvt;
+
+    kvm_get_lapic(s->cpu_env, &klapic);
+    lvt = kapic_reg(&klapic, 0x32 + APIC_LVT_LINT1);
+
+    if (lvt & APIC_LVT_MASKED) {
+        return;
+    }
+
+    if (((lvt >> 8) & 7) != APIC_DM_NMI) {
+        return;
+    }
+
+    kvm_vcpu_ioctl(s->cpu_env, KVM_NMI);
+}
+
+static void __apic_deliver_nmi(APICState *s)
+{
+    if (kvm_irqchip_in_kernel()) {
+        run_on_cpu(s->cpu_env, kvm_irqchip_deliver_nmi, s);
+    } else {
+        apic_local_deliver(s, APIC_LVT_LINT1);
+    }
+}
+#else
+static void __apic_deliver_nmi(APICState *s)
+{
+    apic_local_deliver(s, APIC_LVT_LINT1);
+}
+#endif
+
+enum {
+    KVM_SET_LINT1_UNKNOWN,
+    KVM_SET_LINT1_ENABLED,
+    KVM_SET_LINT1_DISABLED,
+};
+
+static void kvm_set_lint1(void *p)
+{
+    CPUState *env = p;
+
+    kvm_vcpu_ioctl(env, KVM_SET_LINT1);
+}
+
+void apic_deliver_nmi(DeviceState *d)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+    static int kernel_lint1 = KVM_SET_LINT1_UNKNOWN;
+
+    if (kernel_lint1 == KVM_SET_LINT1_UNKNOWN) {
+        if (kvm_enabled() && kvm_irqchip_in_kernel() &&
+            kvm_check_extension(kvm_state, KVM_CAP_SET_LINT1)) {
+            kernel_lint1 = KVM_SET_LINT1_ENABLED;
+        } else {
+            kernel_lint1 = KVM_SET_LINT1_DISABLED;
+        }
+    }
+
+    if (kernel_lint1 == KVM_SET_LINT1_ENABLED) {
+        run_on_cpu(s->cpu_env, kvm_set_lint1, s->cpu_env);
+    } else {
+        __apic_deliver_nmi(s);
+    }
+}
+
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
     int __i, __j, __mask;\
diff --git a/hw/apic.h b/hw/apic.h
index c857d52..3a4be0a 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -10,6 +10,7 @@  void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
                              uint8_t trigger_mode);
 int apic_accept_pic_intr(DeviceState *s);
 void apic_deliver_pic_intr(DeviceState *s, int level);
+void apic_deliver_nmi(DeviceState *d);
 int apic_get_interrupt(DeviceState *s);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);
diff --git a/monitor.c b/monitor.c
index cb485bf..0b81f17 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2616,7 +2616,11 @@  static int do_inject_nmi(Monitor *mon, const QDict *qdict, QObject **ret_data)
     CPUState *env;
 
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        cpu_interrupt(env, CPU_INTERRUPT_NMI);
+        if (!env->apic_state) {
+            cpu_interrupt(env, CPU_INTERRUPT_NMI);
+        } else {
+            apic_deliver_nmi(env->apic_state);
+        }
     }
 
     return 0;