Patchwork [v4,12/15] kvm: x86: Add user space part for in-kernel APIC

login
register
mail settings
Submitter Jan Kiszka
Date Dec. 8, 2011, 11:52 a.m.
Message ID <0ee1c865a905e27e7776e129d21c2ccf411ffd63.1323345150.git.jan.kiszka@siemens.com>
Download mbox | patch
Permalink /patch/130156/
State New
Headers show

Comments

Jan Kiszka - Dec. 8, 2011, 11:52 a.m.
This introduces the alternative APIC backend which makes use of KVM's
in-kernel device model. External NMI injection via LINT1 is emulated by
checking the current state of the in-kernel APIC, only injecting a NMI
into the VCPU if LINT1 is unmasked and configured to DM_NMI.

MSI is not yet supported, so we disable this when the in-kernel model is
in use.

CC: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 Makefile.target   |    2 +-
 hw/kvm/apic.c     |  154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pc.c           |   15 ++++--
 kvm.h             |    3 +
 target-i386/kvm.c |    8 +++
 5 files changed, 176 insertions(+), 6 deletions(-)
 create mode 100644 hw/kvm/apic.c
Blue Swirl - Dec. 8, 2011, 9:16 p.m.
On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> This introduces the alternative APIC backend which makes use of KVM's
> in-kernel device model. External NMI injection via LINT1 is emulated by
> checking the current state of the in-kernel APIC, only injecting a NMI
> into the VCPU if LINT1 is unmasked and configured to DM_NMI.
>
> MSI is not yet supported, so we disable this when the in-kernel model is
> in use.
>
> CC: Lai Jiangshan <laijs@cn.fujitsu.com>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  Makefile.target   |    2 +-
>  hw/kvm/apic.c     |  154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/pc.c           |   15 ++++--
>  kvm.h             |    3 +
>  target-i386/kvm.c |    8 +++
>  5 files changed, 176 insertions(+), 6 deletions(-)
>  create mode 100644 hw/kvm/apic.c
>
> diff --git a/Makefile.target b/Makefile.target
> index b549988..76de485 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>  obj-i386-y += debugcon.o multiboot.o
>  obj-i386-y += pc_piix.o
> -obj-i386-$(CONFIG_KVM) += kvm/clock.o
> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>
>  # shared objects
> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
> new file mode 100644
> index 0000000..3924f9e
> --- /dev/null
> +++ b/hw/kvm/apic.c
> @@ -0,0 +1,154 @@
> +/*
> + * KVM in-kernel APIC support
> + *
> + * Copyright (c) 2011 Siemens AG
> + *
> + * Authors:
> + *  Jan Kiszka          <jan.kiszka@siemens.com>
> + *
> + * This work is licensed under the terms of the GNU GPL version 2.
> + * See the COPYING file in the top-level directory.
> + */
> +#include "hw/apic_internal.h"
> +#include "kvm.h"
> +
> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
> +                                   int reg_id, uint32_t val)
> +{
> +    *((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
> +}
> +
> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
> +                                       int reg_id)
> +{
> +    return *((uint32_t *)(kapic->regs + (reg_id << 4)));
> +}
> +
> +int kvm_put_apic(CPUState *env)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);

Please pass APICState instead of CPUState.

> +    struct kvm_lapic_state kapic;
> +    int i;
> +
> +    if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
> +        memset(&kapic, 0, sizeof(kapic));
> +        kvm_apic_set_reg(&kapic, 0x2, s->id << 24);
> +        kvm_apic_set_reg(&kapic, 0x8, s->tpr);
> +        kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24);
> +        kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
> +        kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec);
> +        for (i = 0; i < 8; i++) {
> +            kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]);
> +            kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]);
> +            kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]);
> +        }
> +        kvm_apic_set_reg(&kapic, 0x28, s->esr);
> +        kvm_apic_set_reg(&kapic, 0x30, s->icr[0]);
> +        kvm_apic_set_reg(&kapic, 0x31, s->icr[1]);
> +        for (i = 0; i < APIC_LVT_NB; i++) {
> +            kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]);
> +        }
> +        kvm_apic_set_reg(&kapic, 0x38, s->initial_count);
> +        kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf);
> +
> +        return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic);
> +    }
> +
> +    return 0;
> +}
> +
> +int kvm_get_apic(CPUState *env)

Same here.

> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
> +    struct kvm_lapic_state kapic;
> +    int ret, i, v;
> +
> +    if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
> +        ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24;
> +        s->tpr = kvm_apic_get_reg(&kapic, 0x8);
> +        s->arb_id = kvm_apic_get_reg(&kapic, 0x9);
> +        s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24;
> +        s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28;
> +        s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf);
> +        for (i = 0; i < 8; i++) {
> +            s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i);
> +            s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i);
> +            s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i);
> +        }
> +        s->esr = kvm_apic_get_reg(&kapic, 0x28);
> +        s->icr[0] = kvm_apic_get_reg(&kapic, 0x30);
> +        s->icr[1] = kvm_apic_get_reg(&kapic, 0x31);
> +        for (i = 0; i < APIC_LVT_NB; i++) {
> +            s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i);
> +        }
> +        s->initial_count = kvm_apic_get_reg(&kapic, 0x38);
> +        s->divide_conf = kvm_apic_get_reg(&kapic, 0x3e);
> +
> +        v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
> +        s->count_shift = (v + 1) & 7;
> +
> +        s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
> +        apic_next_timer(s, s->initial_count_load_time);
> +    }
> +    return 0;
> +}
> +
> +static void kvm_apic_set_base(APICState *s, uint64_t val)
> +{
> +    s->apicbase = val;
> +}
> +
> +static void kvm_apic_set_tpr(APICState *s, uint8_t val)
> +{
> +    s->tpr = (val & 0x0f) << 4;
> +}
> +
> +static void do_inject_external_nmi(void *data)
> +{
> +    APICState *s = data;
> +    CPUState *env = s->cpu_env;
> +    uint32_t lvt;
> +    int ret;
> +
> +    cpu_synchronize_state(env);
> +
> +    lvt = s->lvt[APIC_LVT_LINT1];
> +    if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) {
> +        ret = kvm_vcpu_ioctl(env, KVM_NMI);
> +        if (ret < 0) {
> +            fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
> +                    strerror(-ret));
> +        }
> +    }
> +}
> +
> +static void kvm_apic_external_nmi(APICState *s)
> +{
> +    run_on_cpu(s->cpu_env, do_inject_external_nmi, s);

Here probably CPUState would make more sense.

> +}
> +
> +static void kvm_apic_backend_init(APICState *s)
> +{
> +    memory_region_init_reservation(&s->io_memory, "kvm-apic-msi",
> +                                   MSI_SPACE_SIZE);
> +}
> +
> +static APICBackend kvm_apic_backend = {
> +    .name = "KVM",
> +    .init = kvm_apic_backend_init,
> +    .set_base = kvm_apic_set_base,
> +    .set_tpr = kvm_apic_set_tpr,
> +    .external_nmi = kvm_apic_external_nmi,
> +};
> +
> +static void kvm_apic_register_backend(void)
> +{
> +    apic_register_backend(&kvm_apic_backend);
> +}
> +
> +device_init(kvm_apic_register_backend)
> diff --git a/hw/pc.c b/hw/pc.c
> index 066edc4..8c8aa49 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -878,27 +878,32 @@ DeviceState *cpu_get_current_apic(void)
>
>  static DeviceState *apic_init(void *env, uint8_t apic_id)
>  {
> +    const char *backend = "QEMU";
>     DeviceState *dev;
> -    SysBusDevice *d;
>     static int apic_mapped;
>
>     dev = qdev_create(NULL, "apic");
>     qdev_prop_set_uint8(dev, "id", apic_id);
>     qdev_prop_set_ptr(dev, "cpu_env", env);
> -    qdev_prop_set_string(dev, "backend", g_strdup("QEMU"));
> +    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
> +        backend = "KVM";
> +    }
> +    qdev_prop_set_string(dev, "backend", g_strdup(backend));
>     qdev_init_nofail(dev);
> -    d = sysbus_from_qdev(dev);
>
>     /* XXX: mapping more APICs at the same memory location */
>     if (apic_mapped == 0) {
>         /* NOTE: the APIC is directly connected to the CPU - it is not
>            on the global memory bus. */
>         /* XXX: what if the base changes? */
> -        sysbus_mmio_map(d, 0, MSI_ADDR_BASE);
> +        sysbus_mmio_map(sysbus_from_qdev(dev), 0, MSI_ADDR_BASE);
>         apic_mapped = 1;
>     }
>
> -    msi_supported = true;
> +    /* KVM does not support MSI yet. */
> +    if (!kvm_enabled() || !kvm_irqchip_in_kernel()) {
> +        msi_supported = true;
> +    }
>
>     return dev;
>  }
> diff --git a/kvm.h b/kvm.h
> index a3c87af..446360a 100644
> --- a/kvm.h
> +++ b/kvm.h
> @@ -134,6 +134,9 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level);
>  void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin);
>  int kvm_irqchip_commit_routes(KVMState *s);
>
> +int kvm_put_apic(CPUState *env);
> +int kvm_get_apic(CPUState *env);
> +
>  struct kvm_guest_debug;
>  struct kvm_debug_exit_arch;
>
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 9d1191f..956f436 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -1450,6 +1450,10 @@ int kvm_arch_put_registers(CPUState *env, int level)
>         if (ret < 0) {
>             return ret;
>         }
> +        ret = kvm_put_apic(env);
> +        if (ret < 0) {
> +            return ret;
> +        }
>     }
>     ret = kvm_put_vcpu_events(env, level);
>     if (ret < 0) {
> @@ -1497,6 +1501,10 @@ int kvm_arch_get_registers(CPUState *env)
>     if (ret < 0) {
>         return ret;
>     }
> +    ret = kvm_get_apic(env);
> +    if (ret < 0) {
> +        return ret;
> +    }
>     ret = kvm_get_vcpu_events(env);
>     if (ret < 0) {
>         return ret;
> --
> 1.7.3.4
>
Jan Kiszka - Dec. 9, 2011, 7:45 a.m.
On 2011-12-08 22:16, Blue Swirl wrote:
> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>> This introduces the alternative APIC backend which makes use of KVM's
>> in-kernel device model. External NMI injection via LINT1 is emulated by
>> checking the current state of the in-kernel APIC, only injecting a NMI
>> into the VCPU if LINT1 is unmasked and configured to DM_NMI.
>>
>> MSI is not yet supported, so we disable this when the in-kernel model is
>> in use.
>>
>> CC: Lai Jiangshan <laijs@cn.fujitsu.com>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>  Makefile.target   |    2 +-
>>  hw/kvm/apic.c     |  154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  hw/pc.c           |   15 ++++--
>>  kvm.h             |    3 +
>>  target-i386/kvm.c |    8 +++
>>  5 files changed, 176 insertions(+), 6 deletions(-)
>>  create mode 100644 hw/kvm/apic.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index b549988..76de485 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
>>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>>  obj-i386-y += debugcon.o multiboot.o
>>  obj-i386-y += pc_piix.o
>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o
>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
>>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>>
>>  # shared objects
>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
>> new file mode 100644
>> index 0000000..3924f9e
>> --- /dev/null
>> +++ b/hw/kvm/apic.c
>> @@ -0,0 +1,154 @@
>> +/*
>> + * KVM in-kernel APIC support
>> + *
>> + * Copyright (c) 2011 Siemens AG
>> + *
>> + * Authors:
>> + *  Jan Kiszka          <jan.kiszka@siemens.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL version 2.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +#include "hw/apic_internal.h"
>> +#include "kvm.h"
>> +
>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
>> +                                   int reg_id, uint32_t val)
>> +{
>> +    *((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
>> +}
>> +
>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
>> +                                       int reg_id)
>> +{
>> +    return *((uint32_t *)(kapic->regs + (reg_id << 4)));
>> +}
>> +
>> +int kvm_put_apic(CPUState *env)
>> +{
>> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
> 
> Please pass APICState instead of CPUState.

DeviceState, I suppose. Yes, makes more sense, update will follow.

> 
>> +    struct kvm_lapic_state kapic;
>> +    int i;
>> +
>> +    if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
>> +        memset(&kapic, 0, sizeof(kapic));
>> +        kvm_apic_set_reg(&kapic, 0x2, s->id << 24);
>> +        kvm_apic_set_reg(&kapic, 0x8, s->tpr);
>> +        kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24);
>> +        kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
>> +        kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec);
>> +        for (i = 0; i < 8; i++) {
>> +            kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]);
>> +            kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]);
>> +            kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]);
>> +        }
>> +        kvm_apic_set_reg(&kapic, 0x28, s->esr);
>> +        kvm_apic_set_reg(&kapic, 0x30, s->icr[0]);
>> +        kvm_apic_set_reg(&kapic, 0x31, s->icr[1]);
>> +        for (i = 0; i < APIC_LVT_NB; i++) {
>> +            kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]);
>> +        }
>> +        kvm_apic_set_reg(&kapic, 0x38, s->initial_count);
>> +        kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf);
>> +
>> +        return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +int kvm_get_apic(CPUState *env)
> 
> Same here.
> 
>> +{
>> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
>> +    struct kvm_lapic_state kapic;
>> +    int ret, i, v;
>> +
>> +    if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
>> +        ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic);
>> +        if (ret < 0) {
>> +            return ret;
>> +        }
>> +
>> +        s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24;
>> +        s->tpr = kvm_apic_get_reg(&kapic, 0x8);
>> +        s->arb_id = kvm_apic_get_reg(&kapic, 0x9);
>> +        s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24;
>> +        s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28;
>> +        s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf);
>> +        for (i = 0; i < 8; i++) {
>> +            s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i);
>> +            s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i);
>> +            s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i);
>> +        }
>> +        s->esr = kvm_apic_get_reg(&kapic, 0x28);
>> +        s->icr[0] = kvm_apic_get_reg(&kapic, 0x30);
>> +        s->icr[1] = kvm_apic_get_reg(&kapic, 0x31);
>> +        for (i = 0; i < APIC_LVT_NB; i++) {
>> +            s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i);
>> +        }
>> +        s->initial_count = kvm_apic_get_reg(&kapic, 0x38);
>> +        s->divide_conf = kvm_apic_get_reg(&kapic, 0x3e);
>> +
>> +        v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
>> +        s->count_shift = (v + 1) & 7;
>> +
>> +        s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
>> +        apic_next_timer(s, s->initial_count_load_time);
>> +    }
>> +    return 0;
>> +}
>> +
>> +static void kvm_apic_set_base(APICState *s, uint64_t val)
>> +{
>> +    s->apicbase = val;
>> +}
>> +
>> +static void kvm_apic_set_tpr(APICState *s, uint8_t val)
>> +{
>> +    s->tpr = (val & 0x0f) << 4;
>> +}
>> +
>> +static void do_inject_external_nmi(void *data)
>> +{
>> +    APICState *s = data;
>> +    CPUState *env = s->cpu_env;
>> +    uint32_t lvt;
>> +    int ret;
>> +
>> +    cpu_synchronize_state(env);
>> +
>> +    lvt = s->lvt[APIC_LVT_LINT1];
>> +    if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) {
>> +        ret = kvm_vcpu_ioctl(env, KVM_NMI);
>> +        if (ret < 0) {
>> +            fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
>> +                    strerror(-ret));
>> +        }
>> +    }
>> +}
>> +
>> +static void kvm_apic_external_nmi(APICState *s)
>> +{
>> +    run_on_cpu(s->cpu_env, do_inject_external_nmi, s);
> 
> Here probably CPUState would make more sense.

But here I think it's more consistent to stay with APICState (for the
hook) and DeviceState (for apic_deliver_nmi).

Jan
Jan Kiszka - Dec. 9, 2011, 7:52 a.m.
On 2011-12-09 08:45, Jan Kiszka wrote:
> On 2011-12-08 22:16, Blue Swirl wrote:
>> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>> This introduces the alternative APIC backend which makes use of KVM's
>>> in-kernel device model. External NMI injection via LINT1 is emulated by
>>> checking the current state of the in-kernel APIC, only injecting a NMI
>>> into the VCPU if LINT1 is unmasked and configured to DM_NMI.
>>>
>>> MSI is not yet supported, so we disable this when the in-kernel model is
>>> in use.
>>>
>>> CC: Lai Jiangshan <laijs@cn.fujitsu.com>
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> ---
>>>  Makefile.target   |    2 +-
>>>  hw/kvm/apic.c     |  154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  hw/pc.c           |   15 ++++--
>>>  kvm.h             |    3 +
>>>  target-i386/kvm.c |    8 +++
>>>  5 files changed, 176 insertions(+), 6 deletions(-)
>>>  create mode 100644 hw/kvm/apic.c
>>>
>>> diff --git a/Makefile.target b/Makefile.target
>>> index b549988..76de485 100644
>>> --- a/Makefile.target
>>> +++ b/Makefile.target
>>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
>>>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>>>  obj-i386-y += debugcon.o multiboot.o
>>>  obj-i386-y += pc_piix.o
>>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o
>>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
>>>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>>>
>>>  # shared objects
>>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
>>> new file mode 100644
>>> index 0000000..3924f9e
>>> --- /dev/null
>>> +++ b/hw/kvm/apic.c
>>> @@ -0,0 +1,154 @@
>>> +/*
>>> + * KVM in-kernel APIC support
>>> + *
>>> + * Copyright (c) 2011 Siemens AG
>>> + *
>>> + * Authors:
>>> + *  Jan Kiszka          <jan.kiszka@siemens.com>
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL version 2.
>>> + * See the COPYING file in the top-level directory.
>>> + */
>>> +#include "hw/apic_internal.h"
>>> +#include "kvm.h"
>>> +
>>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
>>> +                                   int reg_id, uint32_t val)
>>> +{
>>> +    *((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
>>> +}
>>> +
>>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
>>> +                                       int reg_id)
>>> +{
>>> +    return *((uint32_t *)(kapic->regs + (reg_id << 4)));
>>> +}
>>> +
>>> +int kvm_put_apic(CPUState *env)
>>> +{
>>> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
>>
>> Please pass APICState instead of CPUState.
> 
> DeviceState, I suppose. Yes, makes more sense, update will follow.

On second look: no, I'll keep it as is. All kvm_get/put_* helpers have
this kind of signature, i.e. are working against env. kvm_get/put_apic
just happens to be implemented outside of target-i386/kvm.c. And they
require both APIC and CPUState anyway, so it makes no difference.

Jan
Blue Swirl - Dec. 10, 2011, 3:40 p.m.
On Fri, Dec 9, 2011 at 07:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> On 2011-12-09 08:45, Jan Kiszka wrote:
>> On 2011-12-08 22:16, Blue Swirl wrote:
>>> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>>> This introduces the alternative APIC backend which makes use of KVM's
>>>> in-kernel device model. External NMI injection via LINT1 is emulated by
>>>> checking the current state of the in-kernel APIC, only injecting a NMI
>>>> into the VCPU if LINT1 is unmasked and configured to DM_NMI.
>>>>
>>>> MSI is not yet supported, so we disable this when the in-kernel model is
>>>> in use.
>>>>
>>>> CC: Lai Jiangshan <laijs@cn.fujitsu.com>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>  Makefile.target   |    2 +-
>>>>  hw/kvm/apic.c     |  154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  hw/pc.c           |   15 ++++--
>>>>  kvm.h             |    3 +
>>>>  target-i386/kvm.c |    8 +++
>>>>  5 files changed, 176 insertions(+), 6 deletions(-)
>>>>  create mode 100644 hw/kvm/apic.c
>>>>
>>>> diff --git a/Makefile.target b/Makefile.target
>>>> index b549988..76de485 100644
>>>> --- a/Makefile.target
>>>> +++ b/Makefile.target
>>>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
>>>>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>>>>  obj-i386-y += debugcon.o multiboot.o
>>>>  obj-i386-y += pc_piix.o
>>>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o
>>>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
>>>>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>>>>
>>>>  # shared objects
>>>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
>>>> new file mode 100644
>>>> index 0000000..3924f9e
>>>> --- /dev/null
>>>> +++ b/hw/kvm/apic.c
>>>> @@ -0,0 +1,154 @@
>>>> +/*
>>>> + * KVM in-kernel APIC support
>>>> + *
>>>> + * Copyright (c) 2011 Siemens AG
>>>> + *
>>>> + * Authors:
>>>> + *  Jan Kiszka          <jan.kiszka@siemens.com>
>>>> + *
>>>> + * This work is licensed under the terms of the GNU GPL version 2.
>>>> + * See the COPYING file in the top-level directory.
>>>> + */
>>>> +#include "hw/apic_internal.h"
>>>> +#include "kvm.h"
>>>> +
>>>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
>>>> +                                   int reg_id, uint32_t val)
>>>> +{
>>>> +    *((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
>>>> +}
>>>> +
>>>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
>>>> +                                       int reg_id)
>>>> +{
>>>> +    return *((uint32_t *)(kapic->regs + (reg_id << 4)));
>>>> +}
>>>> +
>>>> +int kvm_put_apic(CPUState *env)
>>>> +{
>>>> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
>>>
>>> Please pass APICState instead of CPUState.
>>
>> DeviceState, I suppose. Yes, makes more sense, update will follow.
>
> On second look: no, I'll keep it as is. All kvm_get/put_* helpers have
> this kind of signature, i.e. are working against env.

There's kvm_get_supported_msrs for example.

> kvm_get/put_apic
> just happens to be implemented outside of target-i386/kvm.c. And they
> require both APIC and CPUState anyway, so it makes no difference.

It does, passing CPUState violates layering. Please split the
functions so that the ioctl calls which need CPUState go to kvm.c. For
example, the functions in kvm/apic.c could just perform copying from
kvm_lapic_state fields to APICstate fields and vice versa.

The KVM interface by the way does not look so clever. Why isn't there
just an array of 32 bit fields so the casts can be avoided? Perhaps
APICState should be (later) changed to match KVM version so that the
structure can be passed directly without copying.
Jan Kiszka - Dec. 10, 2011, 3:58 p.m.
On 2011-12-10 16:40, Blue Swirl wrote:
> On Fri, Dec 9, 2011 at 07:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>> On 2011-12-09 08:45, Jan Kiszka wrote:
>>> On 2011-12-08 22:16, Blue Swirl wrote:
>>>> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>>>> This introduces the alternative APIC backend which makes use of KVM's
>>>>> in-kernel device model. External NMI injection via LINT1 is emulated by
>>>>> checking the current state of the in-kernel APIC, only injecting a NMI
>>>>> into the VCPU if LINT1 is unmasked and configured to DM_NMI.
>>>>>
>>>>> MSI is not yet supported, so we disable this when the in-kernel model is
>>>>> in use.
>>>>>
>>>>> CC: Lai Jiangshan <laijs@cn.fujitsu.com>
>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>> ---
>>>>>  Makefile.target   |    2 +-
>>>>>  hw/kvm/apic.c     |  154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>  hw/pc.c           |   15 ++++--
>>>>>  kvm.h             |    3 +
>>>>>  target-i386/kvm.c |    8 +++
>>>>>  5 files changed, 176 insertions(+), 6 deletions(-)
>>>>>  create mode 100644 hw/kvm/apic.c
>>>>>
>>>>> diff --git a/Makefile.target b/Makefile.target
>>>>> index b549988..76de485 100644
>>>>> --- a/Makefile.target
>>>>> +++ b/Makefile.target
>>>>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
>>>>>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>>>>>  obj-i386-y += debugcon.o multiboot.o
>>>>>  obj-i386-y += pc_piix.o
>>>>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o
>>>>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
>>>>>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>>>>>
>>>>>  # shared objects
>>>>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
>>>>> new file mode 100644
>>>>> index 0000000..3924f9e
>>>>> --- /dev/null
>>>>> +++ b/hw/kvm/apic.c
>>>>> @@ -0,0 +1,154 @@
>>>>> +/*
>>>>> + * KVM in-kernel APIC support
>>>>> + *
>>>>> + * Copyright (c) 2011 Siemens AG
>>>>> + *
>>>>> + * Authors:
>>>>> + *  Jan Kiszka          <jan.kiszka@siemens.com>
>>>>> + *
>>>>> + * This work is licensed under the terms of the GNU GPL version 2.
>>>>> + * See the COPYING file in the top-level directory.
>>>>> + */
>>>>> +#include "hw/apic_internal.h"
>>>>> +#include "kvm.h"
>>>>> +
>>>>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
>>>>> +                                   int reg_id, uint32_t val)
>>>>> +{
>>>>> +    *((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
>>>>> +}
>>>>> +
>>>>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
>>>>> +                                       int reg_id)
>>>>> +{
>>>>> +    return *((uint32_t *)(kapic->regs + (reg_id << 4)));
>>>>> +}
>>>>> +
>>>>> +int kvm_put_apic(CPUState *env)
>>>>> +{
>>>>> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
>>>>
>>>> Please pass APICState instead of CPUState.
>>>
>>> DeviceState, I suppose. Yes, makes more sense, update will follow.
>>
>> On second look: no, I'll keep it as is. All kvm_get/put_* helpers have
>> this kind of signature, i.e. are working against env.
> 
> There's kvm_get_supported_msrs for example.
> 
>> kvm_get/put_apic
>> just happens to be implemented outside of target-i386/kvm.c. And they
>> require both APIC and CPUState anyway, so it makes no difference.
> 
> It does, passing CPUState violates layering. Please split the
> functions so that the ioctl calls which need CPUState go to kvm.c. For
> example, the functions in kvm/apic.c could just perform copying from
> kvm_lapic_state fields to APICstate fields and vice versa.

That's a good idea.

> 
> The KVM interface by the way does not look so clever. Why isn't there
> just an array of 32 bit fields so the casts can be avoided? Perhaps
> APICState should be (later) changed to match KVM version so that the
> structure can be passed directly without copying.

Wouldn't that complicate the use in the user space model again? At least
for registers that are used with both backends.

Jan
Blue Swirl - Dec. 10, 2011, 4:11 p.m.
On Sat, Dec 10, 2011 at 15:58, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2011-12-10 16:40, Blue Swirl wrote:
>> On Fri, Dec 9, 2011 at 07:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>> On 2011-12-09 08:45, Jan Kiszka wrote:
>>>> On 2011-12-08 22:16, Blue Swirl wrote:
>>>>> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>>>>> This introduces the alternative APIC backend which makes use of KVM's
>>>>>> in-kernel device model. External NMI injection via LINT1 is emulated by
>>>>>> checking the current state of the in-kernel APIC, only injecting a NMI
>>>>>> into the VCPU if LINT1 is unmasked and configured to DM_NMI.
>>>>>>
>>>>>> MSI is not yet supported, so we disable this when the in-kernel model is
>>>>>> in use.
>>>>>>
>>>>>> CC: Lai Jiangshan <laijs@cn.fujitsu.com>
>>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>> ---
>>>>>>  Makefile.target   |    2 +-
>>>>>>  hw/kvm/apic.c     |  154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  hw/pc.c           |   15 ++++--
>>>>>>  kvm.h             |    3 +
>>>>>>  target-i386/kvm.c |    8 +++
>>>>>>  5 files changed, 176 insertions(+), 6 deletions(-)
>>>>>>  create mode 100644 hw/kvm/apic.c
>>>>>>
>>>>>> diff --git a/Makefile.target b/Makefile.target
>>>>>> index b549988..76de485 100644
>>>>>> --- a/Makefile.target
>>>>>> +++ b/Makefile.target
>>>>>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
>>>>>>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>>>>>>  obj-i386-y += debugcon.o multiboot.o
>>>>>>  obj-i386-y += pc_piix.o
>>>>>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o
>>>>>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
>>>>>>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>>>>>>
>>>>>>  # shared objects
>>>>>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
>>>>>> new file mode 100644
>>>>>> index 0000000..3924f9e
>>>>>> --- /dev/null
>>>>>> +++ b/hw/kvm/apic.c
>>>>>> @@ -0,0 +1,154 @@
>>>>>> +/*
>>>>>> + * KVM in-kernel APIC support
>>>>>> + *
>>>>>> + * Copyright (c) 2011 Siemens AG
>>>>>> + *
>>>>>> + * Authors:
>>>>>> + *  Jan Kiszka          <jan.kiszka@siemens.com>
>>>>>> + *
>>>>>> + * This work is licensed under the terms of the GNU GPL version 2.
>>>>>> + * See the COPYING file in the top-level directory.
>>>>>> + */
>>>>>> +#include "hw/apic_internal.h"
>>>>>> +#include "kvm.h"
>>>>>> +
>>>>>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
>>>>>> +                                   int reg_id, uint32_t val)
>>>>>> +{
>>>>>> +    *((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
>>>>>> +}
>>>>>> +
>>>>>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
>>>>>> +                                       int reg_id)
>>>>>> +{
>>>>>> +    return *((uint32_t *)(kapic->regs + (reg_id << 4)));
>>>>>> +}
>>>>>> +
>>>>>> +int kvm_put_apic(CPUState *env)
>>>>>> +{
>>>>>> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
>>>>>
>>>>> Please pass APICState instead of CPUState.
>>>>
>>>> DeviceState, I suppose. Yes, makes more sense, update will follow.
>>>
>>> On second look: no, I'll keep it as is. All kvm_get/put_* helpers have
>>> this kind of signature, i.e. are working against env.
>>
>> There's kvm_get_supported_msrs for example.
>>
>>> kvm_get/put_apic
>>> just happens to be implemented outside of target-i386/kvm.c. And they
>>> require both APIC and CPUState anyway, so it makes no difference.
>>
>> It does, passing CPUState violates layering. Please split the
>> functions so that the ioctl calls which need CPUState go to kvm.c. For
>> example, the functions in kvm/apic.c could just perform copying from
>> kvm_lapic_state fields to APICstate fields and vice versa.
>
> That's a good idea.
>
>>
>> The KVM interface by the way does not look so clever. Why isn't there
>> just an array of 32 bit fields so the casts can be avoided? Perhaps
>> APICState should be (later) changed to match KVM version so that the
>> structure can be passed directly without copying.
>
> Wouldn't that complicate the use in the user space model again? At least
> for registers that are used with both backends.

Well, we have (at least) two styles how to model devices.

In the first one, the device state structure contains an array of
registers, so the functions which use them may need for example to
perform some bit field extraction to get what they need.

In the model used by APIC and others, the structure contains cooked
values, for example divide_count and count_shift in APICState. This
means that the CPU accesses get slightly slower since the fields need
to be packed and unpacked but the other functions may be faster.

Which one is better depends on frequency and importance of register
accesses by CPU vs. other accesses. But it shouldn't complicate that
much either way. Actually design choices like this may have been taken
without too much consideration.

Alternatively, KVM interface could be changed to take QEMU structure
directly, but I don't suppose that would be a good idea. It would be
easier for everyone if QEMU changed instead.

Patch

diff --git a/Makefile.target b/Makefile.target
index b549988..76de485 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -236,7 +236,7 @@  obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
 obj-i386-y += pc_piix.o
-obj-i386-$(CONFIG_KVM) += kvm/clock.o
+obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
 # shared objects
diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
new file mode 100644
index 0000000..3924f9e
--- /dev/null
+++ b/hw/kvm/apic.c
@@ -0,0 +1,154 @@ 
+/*
+ * KVM in-kernel APIC support
+ *
+ * Copyright (c) 2011 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka          <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ */
+#include "hw/apic_internal.h"
+#include "kvm.h"
+
+static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
+                                   int reg_id, uint32_t val)
+{
+    *((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
+}
+
+static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
+                                       int reg_id)
+{
+    return *((uint32_t *)(kapic->regs + (reg_id << 4)));
+}
+
+int kvm_put_apic(CPUState *env)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
+    struct kvm_lapic_state kapic;
+    int i;
+
+    if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
+        memset(&kapic, 0, sizeof(kapic));
+        kvm_apic_set_reg(&kapic, 0x2, s->id << 24);
+        kvm_apic_set_reg(&kapic, 0x8, s->tpr);
+        kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24);
+        kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
+        kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec);
+        for (i = 0; i < 8; i++) {
+            kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]);
+            kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]);
+            kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]);
+        }
+        kvm_apic_set_reg(&kapic, 0x28, s->esr);
+        kvm_apic_set_reg(&kapic, 0x30, s->icr[0]);
+        kvm_apic_set_reg(&kapic, 0x31, s->icr[1]);
+        for (i = 0; i < APIC_LVT_NB; i++) {
+            kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]);
+        }
+        kvm_apic_set_reg(&kapic, 0x38, s->initial_count);
+        kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf);
+
+        return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic);
+    }
+
+    return 0;
+}
+
+int kvm_get_apic(CPUState *env)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
+    struct kvm_lapic_state kapic;
+    int ret, i, v;
+
+    if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
+        ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic);
+        if (ret < 0) {
+            return ret;
+        }
+
+        s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24;
+        s->tpr = kvm_apic_get_reg(&kapic, 0x8);
+        s->arb_id = kvm_apic_get_reg(&kapic, 0x9);
+        s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24;
+        s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28;
+        s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf);
+        for (i = 0; i < 8; i++) {
+            s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i);
+            s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i);
+            s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i);
+        }
+        s->esr = kvm_apic_get_reg(&kapic, 0x28);
+        s->icr[0] = kvm_apic_get_reg(&kapic, 0x30);
+        s->icr[1] = kvm_apic_get_reg(&kapic, 0x31);
+        for (i = 0; i < APIC_LVT_NB; i++) {
+            s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i);
+        }
+        s->initial_count = kvm_apic_get_reg(&kapic, 0x38);
+        s->divide_conf = kvm_apic_get_reg(&kapic, 0x3e);
+
+        v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
+        s->count_shift = (v + 1) & 7;
+
+        s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
+        apic_next_timer(s, s->initial_count_load_time);
+    }
+    return 0;
+}
+
+static void kvm_apic_set_base(APICState *s, uint64_t val)
+{
+    s->apicbase = val;
+}
+
+static void kvm_apic_set_tpr(APICState *s, uint8_t val)
+{
+    s->tpr = (val & 0x0f) << 4;
+}
+
+static void do_inject_external_nmi(void *data)
+{
+    APICState *s = data;
+    CPUState *env = s->cpu_env;
+    uint32_t lvt;
+    int ret;
+
+    cpu_synchronize_state(env);
+
+    lvt = s->lvt[APIC_LVT_LINT1];
+    if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) {
+        ret = kvm_vcpu_ioctl(env, KVM_NMI);
+        if (ret < 0) {
+            fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
+                    strerror(-ret));
+        }
+    }
+}
+
+static void kvm_apic_external_nmi(APICState *s)
+{
+    run_on_cpu(s->cpu_env, do_inject_external_nmi, s);
+}
+
+static void kvm_apic_backend_init(APICState *s)
+{
+    memory_region_init_reservation(&s->io_memory, "kvm-apic-msi",
+                                   MSI_SPACE_SIZE);
+}
+
+static APICBackend kvm_apic_backend = {
+    .name = "KVM",
+    .init = kvm_apic_backend_init,
+    .set_base = kvm_apic_set_base,
+    .set_tpr = kvm_apic_set_tpr,
+    .external_nmi = kvm_apic_external_nmi,
+};
+
+static void kvm_apic_register_backend(void)
+{
+    apic_register_backend(&kvm_apic_backend);
+}
+
+device_init(kvm_apic_register_backend)
diff --git a/hw/pc.c b/hw/pc.c
index 066edc4..8c8aa49 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -878,27 +878,32 @@  DeviceState *cpu_get_current_apic(void)
 
 static DeviceState *apic_init(void *env, uint8_t apic_id)
 {
+    const char *backend = "QEMU";
     DeviceState *dev;
-    SysBusDevice *d;
     static int apic_mapped;
 
     dev = qdev_create(NULL, "apic");
     qdev_prop_set_uint8(dev, "id", apic_id);
     qdev_prop_set_ptr(dev, "cpu_env", env);
-    qdev_prop_set_string(dev, "backend", g_strdup("QEMU"));
+    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+        backend = "KVM";
+    }
+    qdev_prop_set_string(dev, "backend", g_strdup(backend));
     qdev_init_nofail(dev);
-    d = sysbus_from_qdev(dev);
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_mapped == 0) {
         /* NOTE: the APIC is directly connected to the CPU - it is not
            on the global memory bus. */
         /* XXX: what if the base changes? */
-        sysbus_mmio_map(d, 0, MSI_ADDR_BASE);
+        sysbus_mmio_map(sysbus_from_qdev(dev), 0, MSI_ADDR_BASE);
         apic_mapped = 1;
     }
 
-    msi_supported = true;
+    /* KVM does not support MSI yet. */
+    if (!kvm_enabled() || !kvm_irqchip_in_kernel()) {
+        msi_supported = true;
+    }
 
     return dev;
 }
diff --git a/kvm.h b/kvm.h
index a3c87af..446360a 100644
--- a/kvm.h
+++ b/kvm.h
@@ -134,6 +134,9 @@  int kvm_irqchip_set_irq(KVMState *s, int irq, int level);
 void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin);
 int kvm_irqchip_commit_routes(KVMState *s);
 
+int kvm_put_apic(CPUState *env);
+int kvm_get_apic(CPUState *env);
+
 struct kvm_guest_debug;
 struct kvm_debug_exit_arch;
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 9d1191f..956f436 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1450,6 +1450,10 @@  int kvm_arch_put_registers(CPUState *env, int level)
         if (ret < 0) {
             return ret;
         }
+        ret = kvm_put_apic(env);
+        if (ret < 0) {
+            return ret;
+        }
     }
     ret = kvm_put_vcpu_events(env, level);
     if (ret < 0) {
@@ -1497,6 +1501,10 @@  int kvm_arch_get_registers(CPUState *env)
     if (ret < 0) {
         return ret;
     }
+    ret = kvm_get_apic(env);
+    if (ret < 0) {
+        return ret;
+    }
     ret = kvm_get_vcpu_events(env);
     if (ret < 0) {
         return ret;