Message ID | 0ee1c865a905e27e7776e129d21c2ccf411ffd63.1323345150.git.jan.kiszka@siemens.com |
---|---|
State | New |
Headers | show |
On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: > This introduces the alternative APIC backend which makes use of KVM's > in-kernel device model. External NMI injection via LINT1 is emulated by > checking the current state of the in-kernel APIC, only injecting a NMI > into the VCPU if LINT1 is unmasked and configured to DM_NMI. > > MSI is not yet supported, so we disable this when the in-kernel model is > in use. > > CC: Lai Jiangshan <laijs@cn.fujitsu.com> > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > Makefile.target | 2 +- > hw/kvm/apic.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/pc.c | 15 ++++-- > kvm.h | 3 + > target-i386/kvm.c | 8 +++ > 5 files changed, 176 insertions(+), 6 deletions(-) > create mode 100644 hw/kvm/apic.c > > diff --git a/Makefile.target b/Makefile.target > index b549988..76de485 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -236,7 +236,7 @@ obj-i386-y += vmport.o > obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o > obj-i386-y += debugcon.o multiboot.o > obj-i386-y += pc_piix.o > -obj-i386-$(CONFIG_KVM) += kvm/clock.o > +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o > obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o > > # shared objects > diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c > new file mode 100644 > index 0000000..3924f9e > --- /dev/null > +++ b/hw/kvm/apic.c > @@ -0,0 +1,154 @@ > +/* > + * KVM in-kernel APIC support > + * > + * Copyright (c) 2011 Siemens AG > + * > + * Authors: > + * Jan Kiszka <jan.kiszka@siemens.com> > + * > + * This work is licensed under the terms of the GNU GPL version 2. > + * See the COPYING file in the top-level directory. > + */ > +#include "hw/apic_internal.h" > +#include "kvm.h" > + > +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, > + int reg_id, uint32_t val) > +{ > + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; > +} > + > +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, > + int reg_id) > +{ > + return *((uint32_t *)(kapic->regs + (reg_id << 4))); > +} > + > +int kvm_put_apic(CPUState *env) > +{ > + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); Please pass APICState instead of CPUState. > + struct kvm_lapic_state kapic; > + int i; > + > + if (s && kvm_enabled() && kvm_irqchip_in_kernel()) { > + memset(&kapic, 0, sizeof(kapic)); > + kvm_apic_set_reg(&kapic, 0x2, s->id << 24); > + kvm_apic_set_reg(&kapic, 0x8, s->tpr); > + kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24); > + kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fffffff); > + kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec); > + for (i = 0; i < 8; i++) { > + kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]); > + kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]); > + kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]); > + } > + kvm_apic_set_reg(&kapic, 0x28, s->esr); > + kvm_apic_set_reg(&kapic, 0x30, s->icr[0]); > + kvm_apic_set_reg(&kapic, 0x31, s->icr[1]); > + for (i = 0; i < APIC_LVT_NB; i++) { > + kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]); > + } > + kvm_apic_set_reg(&kapic, 0x38, s->initial_count); > + kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf); > + > + return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic); > + } > + > + return 0; > +} > + > +int kvm_get_apic(CPUState *env) Same here. > +{ > + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); > + struct kvm_lapic_state kapic; > + int ret, i, v; > + > + if (s && kvm_enabled() && kvm_irqchip_in_kernel()) { > + ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic); > + if (ret < 0) { > + return ret; > + } > + > + s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24; > + s->tpr = kvm_apic_get_reg(&kapic, 0x8); > + s->arb_id = kvm_apic_get_reg(&kapic, 0x9); > + s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24; > + s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28; > + s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf); > + for (i = 0; i < 8; i++) { > + s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i); > + s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i); > + s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i); > + } > + s->esr = kvm_apic_get_reg(&kapic, 0x28); > + s->icr[0] = kvm_apic_get_reg(&kapic, 0x30); > + s->icr[1] = kvm_apic_get_reg(&kapic, 0x31); > + for (i = 0; i < APIC_LVT_NB; i++) { > + s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i); > + } > + s->initial_count = kvm_apic_get_reg(&kapic, 0x38); > + s->divide_conf = kvm_apic_get_reg(&kapic, 0x3e); > + > + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); > + s->count_shift = (v + 1) & 7; > + > + s->initial_count_load_time = qemu_get_clock_ns(vm_clock); > + apic_next_timer(s, s->initial_count_load_time); > + } > + return 0; > +} > + > +static void kvm_apic_set_base(APICState *s, uint64_t val) > +{ > + s->apicbase = val; > +} > + > +static void kvm_apic_set_tpr(APICState *s, uint8_t val) > +{ > + s->tpr = (val & 0x0f) << 4; > +} > + > +static void do_inject_external_nmi(void *data) > +{ > + APICState *s = data; > + CPUState *env = s->cpu_env; > + uint32_t lvt; > + int ret; > + > + cpu_synchronize_state(env); > + > + lvt = s->lvt[APIC_LVT_LINT1]; > + if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) { > + ret = kvm_vcpu_ioctl(env, KVM_NMI); > + if (ret < 0) { > + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", > + strerror(-ret)); > + } > + } > +} > + > +static void kvm_apic_external_nmi(APICState *s) > +{ > + run_on_cpu(s->cpu_env, do_inject_external_nmi, s); Here probably CPUState would make more sense. > +} > + > +static void kvm_apic_backend_init(APICState *s) > +{ > + memory_region_init_reservation(&s->io_memory, "kvm-apic-msi", > + MSI_SPACE_SIZE); > +} > + > +static APICBackend kvm_apic_backend = { > + .name = "KVM", > + .init = kvm_apic_backend_init, > + .set_base = kvm_apic_set_base, > + .set_tpr = kvm_apic_set_tpr, > + .external_nmi = kvm_apic_external_nmi, > +}; > + > +static void kvm_apic_register_backend(void) > +{ > + apic_register_backend(&kvm_apic_backend); > +} > + > +device_init(kvm_apic_register_backend) > diff --git a/hw/pc.c b/hw/pc.c > index 066edc4..8c8aa49 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -878,27 +878,32 @@ DeviceState *cpu_get_current_apic(void) > > static DeviceState *apic_init(void *env, uint8_t apic_id) > { > + const char *backend = "QEMU"; > DeviceState *dev; > - SysBusDevice *d; > static int apic_mapped; > > dev = qdev_create(NULL, "apic"); > qdev_prop_set_uint8(dev, "id", apic_id); > qdev_prop_set_ptr(dev, "cpu_env", env); > - qdev_prop_set_string(dev, "backend", g_strdup("QEMU")); > + if (kvm_enabled() && kvm_irqchip_in_kernel()) { > + backend = "KVM"; > + } > + qdev_prop_set_string(dev, "backend", g_strdup(backend)); > qdev_init_nofail(dev); > - d = sysbus_from_qdev(dev); > > /* XXX: mapping more APICs at the same memory location */ > if (apic_mapped == 0) { > /* NOTE: the APIC is directly connected to the CPU - it is not > on the global memory bus. */ > /* XXX: what if the base changes? */ > - sysbus_mmio_map(d, 0, MSI_ADDR_BASE); > + sysbus_mmio_map(sysbus_from_qdev(dev), 0, MSI_ADDR_BASE); > apic_mapped = 1; > } > > - msi_supported = true; > + /* KVM does not support MSI yet. */ > + if (!kvm_enabled() || !kvm_irqchip_in_kernel()) { > + msi_supported = true; > + } > > return dev; > } > diff --git a/kvm.h b/kvm.h > index a3c87af..446360a 100644 > --- a/kvm.h > +++ b/kvm.h > @@ -134,6 +134,9 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level); > void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin); > int kvm_irqchip_commit_routes(KVMState *s); > > +int kvm_put_apic(CPUState *env); > +int kvm_get_apic(CPUState *env); > + > struct kvm_guest_debug; > struct kvm_debug_exit_arch; > > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > index 9d1191f..956f436 100644 > --- a/target-i386/kvm.c > +++ b/target-i386/kvm.c > @@ -1450,6 +1450,10 @@ int kvm_arch_put_registers(CPUState *env, int level) > if (ret < 0) { > return ret; > } > + ret = kvm_put_apic(env); > + if (ret < 0) { > + return ret; > + } > } > ret = kvm_put_vcpu_events(env, level); > if (ret < 0) { > @@ -1497,6 +1501,10 @@ int kvm_arch_get_registers(CPUState *env) > if (ret < 0) { > return ret; > } > + ret = kvm_get_apic(env); > + if (ret < 0) { > + return ret; > + } > ret = kvm_get_vcpu_events(env); > if (ret < 0) { > return ret; > -- > 1.7.3.4 >
On 2011-12-08 22:16, Blue Swirl wrote: > On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: >> This introduces the alternative APIC backend which makes use of KVM's >> in-kernel device model. External NMI injection via LINT1 is emulated by >> checking the current state of the in-kernel APIC, only injecting a NMI >> into the VCPU if LINT1 is unmasked and configured to DM_NMI. >> >> MSI is not yet supported, so we disable this when the in-kernel model is >> in use. >> >> CC: Lai Jiangshan <laijs@cn.fujitsu.com> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >> --- >> Makefile.target | 2 +- >> hw/kvm/apic.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/pc.c | 15 ++++-- >> kvm.h | 3 + >> target-i386/kvm.c | 8 +++ >> 5 files changed, 176 insertions(+), 6 deletions(-) >> create mode 100644 hw/kvm/apic.c >> >> diff --git a/Makefile.target b/Makefile.target >> index b549988..76de485 100644 >> --- a/Makefile.target >> +++ b/Makefile.target >> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o >> obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o >> obj-i386-y += debugcon.o multiboot.o >> obj-i386-y += pc_piix.o >> -obj-i386-$(CONFIG_KVM) += kvm/clock.o >> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o >> obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o >> >> # shared objects >> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c >> new file mode 100644 >> index 0000000..3924f9e >> --- /dev/null >> +++ b/hw/kvm/apic.c >> @@ -0,0 +1,154 @@ >> +/* >> + * KVM in-kernel APIC support >> + * >> + * Copyright (c) 2011 Siemens AG >> + * >> + * Authors: >> + * Jan Kiszka <jan.kiszka@siemens.com> >> + * >> + * This work is licensed under the terms of the GNU GPL version 2. >> + * See the COPYING file in the top-level directory. >> + */ >> +#include "hw/apic_internal.h" >> +#include "kvm.h" >> + >> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, >> + int reg_id, uint32_t val) >> +{ >> + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; >> +} >> + >> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, >> + int reg_id) >> +{ >> + return *((uint32_t *)(kapic->regs + (reg_id << 4))); >> +} >> + >> +int kvm_put_apic(CPUState *env) >> +{ >> + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); > > Please pass APICState instead of CPUState. DeviceState, I suppose. Yes, makes more sense, update will follow. > >> + struct kvm_lapic_state kapic; >> + int i; >> + >> + if (s && kvm_enabled() && kvm_irqchip_in_kernel()) { >> + memset(&kapic, 0, sizeof(kapic)); >> + kvm_apic_set_reg(&kapic, 0x2, s->id << 24); >> + kvm_apic_set_reg(&kapic, 0x8, s->tpr); >> + kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24); >> + kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fffffff); >> + kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec); >> + for (i = 0; i < 8; i++) { >> + kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]); >> + kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]); >> + kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]); >> + } >> + kvm_apic_set_reg(&kapic, 0x28, s->esr); >> + kvm_apic_set_reg(&kapic, 0x30, s->icr[0]); >> + kvm_apic_set_reg(&kapic, 0x31, s->icr[1]); >> + for (i = 0; i < APIC_LVT_NB; i++) { >> + kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]); >> + } >> + kvm_apic_set_reg(&kapic, 0x38, s->initial_count); >> + kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf); >> + >> + return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic); >> + } >> + >> + return 0; >> +} >> + >> +int kvm_get_apic(CPUState *env) > > Same here. > >> +{ >> + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); >> + struct kvm_lapic_state kapic; >> + int ret, i, v; >> + >> + if (s && kvm_enabled() && kvm_irqchip_in_kernel()) { >> + ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic); >> + if (ret < 0) { >> + return ret; >> + } >> + >> + s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24; >> + s->tpr = kvm_apic_get_reg(&kapic, 0x8); >> + s->arb_id = kvm_apic_get_reg(&kapic, 0x9); >> + s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24; >> + s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28; >> + s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf); >> + for (i = 0; i < 8; i++) { >> + s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i); >> + s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i); >> + s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i); >> + } >> + s->esr = kvm_apic_get_reg(&kapic, 0x28); >> + s->icr[0] = kvm_apic_get_reg(&kapic, 0x30); >> + s->icr[1] = kvm_apic_get_reg(&kapic, 0x31); >> + for (i = 0; i < APIC_LVT_NB; i++) { >> + s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i); >> + } >> + s->initial_count = kvm_apic_get_reg(&kapic, 0x38); >> + s->divide_conf = kvm_apic_get_reg(&kapic, 0x3e); >> + >> + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); >> + s->count_shift = (v + 1) & 7; >> + >> + s->initial_count_load_time = qemu_get_clock_ns(vm_clock); >> + apic_next_timer(s, s->initial_count_load_time); >> + } >> + return 0; >> +} >> + >> +static void kvm_apic_set_base(APICState *s, uint64_t val) >> +{ >> + s->apicbase = val; >> +} >> + >> +static void kvm_apic_set_tpr(APICState *s, uint8_t val) >> +{ >> + s->tpr = (val & 0x0f) << 4; >> +} >> + >> +static void do_inject_external_nmi(void *data) >> +{ >> + APICState *s = data; >> + CPUState *env = s->cpu_env; >> + uint32_t lvt; >> + int ret; >> + >> + cpu_synchronize_state(env); >> + >> + lvt = s->lvt[APIC_LVT_LINT1]; >> + if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) { >> + ret = kvm_vcpu_ioctl(env, KVM_NMI); >> + if (ret < 0) { >> + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", >> + strerror(-ret)); >> + } >> + } >> +} >> + >> +static void kvm_apic_external_nmi(APICState *s) >> +{ >> + run_on_cpu(s->cpu_env, do_inject_external_nmi, s); > > Here probably CPUState would make more sense. But here I think it's more consistent to stay with APICState (for the hook) and DeviceState (for apic_deliver_nmi). Jan
On 2011-12-09 08:45, Jan Kiszka wrote: > On 2011-12-08 22:16, Blue Swirl wrote: >> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: >>> This introduces the alternative APIC backend which makes use of KVM's >>> in-kernel device model. External NMI injection via LINT1 is emulated by >>> checking the current state of the in-kernel APIC, only injecting a NMI >>> into the VCPU if LINT1 is unmasked and configured to DM_NMI. >>> >>> MSI is not yet supported, so we disable this when the in-kernel model is >>> in use. >>> >>> CC: Lai Jiangshan <laijs@cn.fujitsu.com> >>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >>> --- >>> Makefile.target | 2 +- >>> hw/kvm/apic.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ >>> hw/pc.c | 15 ++++-- >>> kvm.h | 3 + >>> target-i386/kvm.c | 8 +++ >>> 5 files changed, 176 insertions(+), 6 deletions(-) >>> create mode 100644 hw/kvm/apic.c >>> >>> diff --git a/Makefile.target b/Makefile.target >>> index b549988..76de485 100644 >>> --- a/Makefile.target >>> +++ b/Makefile.target >>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o >>> obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o >>> obj-i386-y += debugcon.o multiboot.o >>> obj-i386-y += pc_piix.o >>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o >>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o >>> obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o >>> >>> # shared objects >>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c >>> new file mode 100644 >>> index 0000000..3924f9e >>> --- /dev/null >>> +++ b/hw/kvm/apic.c >>> @@ -0,0 +1,154 @@ >>> +/* >>> + * KVM in-kernel APIC support >>> + * >>> + * Copyright (c) 2011 Siemens AG >>> + * >>> + * Authors: >>> + * Jan Kiszka <jan.kiszka@siemens.com> >>> + * >>> + * This work is licensed under the terms of the GNU GPL version 2. >>> + * See the COPYING file in the top-level directory. >>> + */ >>> +#include "hw/apic_internal.h" >>> +#include "kvm.h" >>> + >>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, >>> + int reg_id, uint32_t val) >>> +{ >>> + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; >>> +} >>> + >>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, >>> + int reg_id) >>> +{ >>> + return *((uint32_t *)(kapic->regs + (reg_id << 4))); >>> +} >>> + >>> +int kvm_put_apic(CPUState *env) >>> +{ >>> + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); >> >> Please pass APICState instead of CPUState. > > DeviceState, I suppose. Yes, makes more sense, update will follow. On second look: no, I'll keep it as is. All kvm_get/put_* helpers have this kind of signature, i.e. are working against env. kvm_get/put_apic just happens to be implemented outside of target-i386/kvm.c. And they require both APIC and CPUState anyway, so it makes no difference. Jan
On Fri, Dec 9, 2011 at 07:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: > On 2011-12-09 08:45, Jan Kiszka wrote: >> On 2011-12-08 22:16, Blue Swirl wrote: >>> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: >>>> This introduces the alternative APIC backend which makes use of KVM's >>>> in-kernel device model. External NMI injection via LINT1 is emulated by >>>> checking the current state of the in-kernel APIC, only injecting a NMI >>>> into the VCPU if LINT1 is unmasked and configured to DM_NMI. >>>> >>>> MSI is not yet supported, so we disable this when the in-kernel model is >>>> in use. >>>> >>>> CC: Lai Jiangshan <laijs@cn.fujitsu.com> >>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >>>> --- >>>> Makefile.target | 2 +- >>>> hw/kvm/apic.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ >>>> hw/pc.c | 15 ++++-- >>>> kvm.h | 3 + >>>> target-i386/kvm.c | 8 +++ >>>> 5 files changed, 176 insertions(+), 6 deletions(-) >>>> create mode 100644 hw/kvm/apic.c >>>> >>>> diff --git a/Makefile.target b/Makefile.target >>>> index b549988..76de485 100644 >>>> --- a/Makefile.target >>>> +++ b/Makefile.target >>>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o >>>> obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o >>>> obj-i386-y += debugcon.o multiboot.o >>>> obj-i386-y += pc_piix.o >>>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o >>>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o >>>> obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o >>>> >>>> # shared objects >>>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c >>>> new file mode 100644 >>>> index 0000000..3924f9e >>>> --- /dev/null >>>> +++ b/hw/kvm/apic.c >>>> @@ -0,0 +1,154 @@ >>>> +/* >>>> + * KVM in-kernel APIC support >>>> + * >>>> + * Copyright (c) 2011 Siemens AG >>>> + * >>>> + * Authors: >>>> + * Jan Kiszka <jan.kiszka@siemens.com> >>>> + * >>>> + * This work is licensed under the terms of the GNU GPL version 2. >>>> + * See the COPYING file in the top-level directory. >>>> + */ >>>> +#include "hw/apic_internal.h" >>>> +#include "kvm.h" >>>> + >>>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, >>>> + int reg_id, uint32_t val) >>>> +{ >>>> + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; >>>> +} >>>> + >>>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, >>>> + int reg_id) >>>> +{ >>>> + return *((uint32_t *)(kapic->regs + (reg_id << 4))); >>>> +} >>>> + >>>> +int kvm_put_apic(CPUState *env) >>>> +{ >>>> + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); >>> >>> Please pass APICState instead of CPUState. >> >> DeviceState, I suppose. Yes, makes more sense, update will follow. > > On second look: no, I'll keep it as is. All kvm_get/put_* helpers have > this kind of signature, i.e. are working against env. There's kvm_get_supported_msrs for example. > kvm_get/put_apic > just happens to be implemented outside of target-i386/kvm.c. And they > require both APIC and CPUState anyway, so it makes no difference. It does, passing CPUState violates layering. Please split the functions so that the ioctl calls which need CPUState go to kvm.c. For example, the functions in kvm/apic.c could just perform copying from kvm_lapic_state fields to APICstate fields and vice versa. The KVM interface by the way does not look so clever. Why isn't there just an array of 32 bit fields so the casts can be avoided? Perhaps APICState should be (later) changed to match KVM version so that the structure can be passed directly without copying.
On 2011-12-10 16:40, Blue Swirl wrote: > On Fri, Dec 9, 2011 at 07:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: >> On 2011-12-09 08:45, Jan Kiszka wrote: >>> On 2011-12-08 22:16, Blue Swirl wrote: >>>> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: >>>>> This introduces the alternative APIC backend which makes use of KVM's >>>>> in-kernel device model. External NMI injection via LINT1 is emulated by >>>>> checking the current state of the in-kernel APIC, only injecting a NMI >>>>> into the VCPU if LINT1 is unmasked and configured to DM_NMI. >>>>> >>>>> MSI is not yet supported, so we disable this when the in-kernel model is >>>>> in use. >>>>> >>>>> CC: Lai Jiangshan <laijs@cn.fujitsu.com> >>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >>>>> --- >>>>> Makefile.target | 2 +- >>>>> hw/kvm/apic.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ >>>>> hw/pc.c | 15 ++++-- >>>>> kvm.h | 3 + >>>>> target-i386/kvm.c | 8 +++ >>>>> 5 files changed, 176 insertions(+), 6 deletions(-) >>>>> create mode 100644 hw/kvm/apic.c >>>>> >>>>> diff --git a/Makefile.target b/Makefile.target >>>>> index b549988..76de485 100644 >>>>> --- a/Makefile.target >>>>> +++ b/Makefile.target >>>>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o >>>>> obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o >>>>> obj-i386-y += debugcon.o multiboot.o >>>>> obj-i386-y += pc_piix.o >>>>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o >>>>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o >>>>> obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o >>>>> >>>>> # shared objects >>>>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c >>>>> new file mode 100644 >>>>> index 0000000..3924f9e >>>>> --- /dev/null >>>>> +++ b/hw/kvm/apic.c >>>>> @@ -0,0 +1,154 @@ >>>>> +/* >>>>> + * KVM in-kernel APIC support >>>>> + * >>>>> + * Copyright (c) 2011 Siemens AG >>>>> + * >>>>> + * Authors: >>>>> + * Jan Kiszka <jan.kiszka@siemens.com> >>>>> + * >>>>> + * This work is licensed under the terms of the GNU GPL version 2. >>>>> + * See the COPYING file in the top-level directory. >>>>> + */ >>>>> +#include "hw/apic_internal.h" >>>>> +#include "kvm.h" >>>>> + >>>>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, >>>>> + int reg_id, uint32_t val) >>>>> +{ >>>>> + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; >>>>> +} >>>>> + >>>>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, >>>>> + int reg_id) >>>>> +{ >>>>> + return *((uint32_t *)(kapic->regs + (reg_id << 4))); >>>>> +} >>>>> + >>>>> +int kvm_put_apic(CPUState *env) >>>>> +{ >>>>> + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); >>>> >>>> Please pass APICState instead of CPUState. >>> >>> DeviceState, I suppose. Yes, makes more sense, update will follow. >> >> On second look: no, I'll keep it as is. All kvm_get/put_* helpers have >> this kind of signature, i.e. are working against env. > > There's kvm_get_supported_msrs for example. > >> kvm_get/put_apic >> just happens to be implemented outside of target-i386/kvm.c. And they >> require both APIC and CPUState anyway, so it makes no difference. > > It does, passing CPUState violates layering. Please split the > functions so that the ioctl calls which need CPUState go to kvm.c. For > example, the functions in kvm/apic.c could just perform copying from > kvm_lapic_state fields to APICstate fields and vice versa. That's a good idea. > > The KVM interface by the way does not look so clever. Why isn't there > just an array of 32 bit fields so the casts can be avoided? Perhaps > APICState should be (later) changed to match KVM version so that the > structure can be passed directly without copying. Wouldn't that complicate the use in the user space model again? At least for registers that are used with both backends. Jan
On Sat, Dec 10, 2011 at 15:58, Jan Kiszka <jan.kiszka@web.de> wrote: > On 2011-12-10 16:40, Blue Swirl wrote: >> On Fri, Dec 9, 2011 at 07:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: >>> On 2011-12-09 08:45, Jan Kiszka wrote: >>>> On 2011-12-08 22:16, Blue Swirl wrote: >>>>> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka <jan.kiszka@siemens.com> wrote: >>>>>> This introduces the alternative APIC backend which makes use of KVM's >>>>>> in-kernel device model. External NMI injection via LINT1 is emulated by >>>>>> checking the current state of the in-kernel APIC, only injecting a NMI >>>>>> into the VCPU if LINT1 is unmasked and configured to DM_NMI. >>>>>> >>>>>> MSI is not yet supported, so we disable this when the in-kernel model is >>>>>> in use. >>>>>> >>>>>> CC: Lai Jiangshan <laijs@cn.fujitsu.com> >>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >>>>>> --- >>>>>> Makefile.target | 2 +- >>>>>> hw/kvm/apic.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ >>>>>> hw/pc.c | 15 ++++-- >>>>>> kvm.h | 3 + >>>>>> target-i386/kvm.c | 8 +++ >>>>>> 5 files changed, 176 insertions(+), 6 deletions(-) >>>>>> create mode 100644 hw/kvm/apic.c >>>>>> >>>>>> diff --git a/Makefile.target b/Makefile.target >>>>>> index b549988..76de485 100644 >>>>>> --- a/Makefile.target >>>>>> +++ b/Makefile.target >>>>>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o >>>>>> obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o >>>>>> obj-i386-y += debugcon.o multiboot.o >>>>>> obj-i386-y += pc_piix.o >>>>>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o >>>>>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o >>>>>> obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o >>>>>> >>>>>> # shared objects >>>>>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c >>>>>> new file mode 100644 >>>>>> index 0000000..3924f9e >>>>>> --- /dev/null >>>>>> +++ b/hw/kvm/apic.c >>>>>> @@ -0,0 +1,154 @@ >>>>>> +/* >>>>>> + * KVM in-kernel APIC support >>>>>> + * >>>>>> + * Copyright (c) 2011 Siemens AG >>>>>> + * >>>>>> + * Authors: >>>>>> + * Jan Kiszka <jan.kiszka@siemens.com> >>>>>> + * >>>>>> + * This work is licensed under the terms of the GNU GPL version 2. >>>>>> + * See the COPYING file in the top-level directory. >>>>>> + */ >>>>>> +#include "hw/apic_internal.h" >>>>>> +#include "kvm.h" >>>>>> + >>>>>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, >>>>>> + int reg_id, uint32_t val) >>>>>> +{ >>>>>> + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; >>>>>> +} >>>>>> + >>>>>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, >>>>>> + int reg_id) >>>>>> +{ >>>>>> + return *((uint32_t *)(kapic->regs + (reg_id << 4))); >>>>>> +} >>>>>> + >>>>>> +int kvm_put_apic(CPUState *env) >>>>>> +{ >>>>>> + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); >>>>> >>>>> Please pass APICState instead of CPUState. >>>> >>>> DeviceState, I suppose. Yes, makes more sense, update will follow. >>> >>> On second look: no, I'll keep it as is. All kvm_get/put_* helpers have >>> this kind of signature, i.e. are working against env. >> >> There's kvm_get_supported_msrs for example. >> >>> kvm_get/put_apic >>> just happens to be implemented outside of target-i386/kvm.c. And they >>> require both APIC and CPUState anyway, so it makes no difference. >> >> It does, passing CPUState violates layering. Please split the >> functions so that the ioctl calls which need CPUState go to kvm.c. For >> example, the functions in kvm/apic.c could just perform copying from >> kvm_lapic_state fields to APICstate fields and vice versa. > > That's a good idea. > >> >> The KVM interface by the way does not look so clever. Why isn't there >> just an array of 32 bit fields so the casts can be avoided? Perhaps >> APICState should be (later) changed to match KVM version so that the >> structure can be passed directly without copying. > > Wouldn't that complicate the use in the user space model again? At least > for registers that are used with both backends. Well, we have (at least) two styles how to model devices. In the first one, the device state structure contains an array of registers, so the functions which use them may need for example to perform some bit field extraction to get what they need. In the model used by APIC and others, the structure contains cooked values, for example divide_count and count_shift in APICState. This means that the CPU accesses get slightly slower since the fields need to be packed and unpacked but the other functions may be faster. Which one is better depends on frequency and importance of register accesses by CPU vs. other accesses. But it shouldn't complicate that much either way. Actually design choices like this may have been taken without too much consideration. Alternatively, KVM interface could be changed to take QEMU structure directly, but I don't suppose that would be a good idea. It would be easier for everyone if QEMU changed instead.
diff --git a/Makefile.target b/Makefile.target index b549988..76de485 100644 --- a/Makefile.target +++ b/Makefile.target @@ -236,7 +236,7 @@ obj-i386-y += vmport.o obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o obj-i386-y += pc_piix.o -obj-i386-$(CONFIG_KVM) += kvm/clock.o +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o # shared objects diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c new file mode 100644 index 0000000..3924f9e --- /dev/null +++ b/hw/kvm/apic.c @@ -0,0 +1,154 @@ +/* + * KVM in-kernel APIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "hw/apic_internal.h" +#include "kvm.h" + +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, + int reg_id, uint32_t val) +{ + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; +} + +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, + int reg_id) +{ + return *((uint32_t *)(kapic->regs + (reg_id << 4))); +} + +int kvm_put_apic(CPUState *env) +{ + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); + struct kvm_lapic_state kapic; + int i; + + if (s && kvm_enabled() && kvm_irqchip_in_kernel()) { + memset(&kapic, 0, sizeof(kapic)); + kvm_apic_set_reg(&kapic, 0x2, s->id << 24); + kvm_apic_set_reg(&kapic, 0x8, s->tpr); + kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24); + kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fffffff); + kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec); + for (i = 0; i < 8; i++) { + kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]); + kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]); + kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]); + } + kvm_apic_set_reg(&kapic, 0x28, s->esr); + kvm_apic_set_reg(&kapic, 0x30, s->icr[0]); + kvm_apic_set_reg(&kapic, 0x31, s->icr[1]); + for (i = 0; i < APIC_LVT_NB; i++) { + kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]); + } + kvm_apic_set_reg(&kapic, 0x38, s->initial_count); + kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf); + + return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic); + } + + return 0; +} + +int kvm_get_apic(CPUState *env) +{ + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); + struct kvm_lapic_state kapic; + int ret, i, v; + + if (s && kvm_enabled() && kvm_irqchip_in_kernel()) { + ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic); + if (ret < 0) { + return ret; + } + + s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24; + s->tpr = kvm_apic_get_reg(&kapic, 0x8); + s->arb_id = kvm_apic_get_reg(&kapic, 0x9); + s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24; + s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28; + s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf); + for (i = 0; i < 8; i++) { + s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i); + s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i); + s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i); + } + s->esr = kvm_apic_get_reg(&kapic, 0x28); + s->icr[0] = kvm_apic_get_reg(&kapic, 0x30); + s->icr[1] = kvm_apic_get_reg(&kapic, 0x31); + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i); + } + s->initial_count = kvm_apic_get_reg(&kapic, 0x38); + s->divide_conf = kvm_apic_get_reg(&kapic, 0x3e); + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_get_clock_ns(vm_clock); + apic_next_timer(s, s->initial_count_load_time); + } + return 0; +} + +static void kvm_apic_set_base(APICState *s, uint64_t val) +{ + s->apicbase = val; +} + +static void kvm_apic_set_tpr(APICState *s, uint8_t val) +{ + s->tpr = (val & 0x0f) << 4; +} + +static void do_inject_external_nmi(void *data) +{ + APICState *s = data; + CPUState *env = s->cpu_env; + uint32_t lvt; + int ret; + + cpu_synchronize_state(env); + + lvt = s->lvt[APIC_LVT_LINT1]; + if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) { + ret = kvm_vcpu_ioctl(env, KVM_NMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", + strerror(-ret)); + } + } +} + +static void kvm_apic_external_nmi(APICState *s) +{ + run_on_cpu(s->cpu_env, do_inject_external_nmi, s); +} + +static void kvm_apic_backend_init(APICState *s) +{ + memory_region_init_reservation(&s->io_memory, "kvm-apic-msi", + MSI_SPACE_SIZE); +} + +static APICBackend kvm_apic_backend = { + .name = "KVM", + .init = kvm_apic_backend_init, + .set_base = kvm_apic_set_base, + .set_tpr = kvm_apic_set_tpr, + .external_nmi = kvm_apic_external_nmi, +}; + +static void kvm_apic_register_backend(void) +{ + apic_register_backend(&kvm_apic_backend); +} + +device_init(kvm_apic_register_backend) diff --git a/hw/pc.c b/hw/pc.c index 066edc4..8c8aa49 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -878,27 +878,32 @@ DeviceState *cpu_get_current_apic(void) static DeviceState *apic_init(void *env, uint8_t apic_id) { + const char *backend = "QEMU"; DeviceState *dev; - SysBusDevice *d; static int apic_mapped; dev = qdev_create(NULL, "apic"); qdev_prop_set_uint8(dev, "id", apic_id); qdev_prop_set_ptr(dev, "cpu_env", env); - qdev_prop_set_string(dev, "backend", g_strdup("QEMU")); + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + backend = "KVM"; + } + qdev_prop_set_string(dev, "backend", g_strdup(backend)); qdev_init_nofail(dev); - d = sysbus_from_qdev(dev); /* XXX: mapping more APICs at the same memory location */ if (apic_mapped == 0) { /* NOTE: the APIC is directly connected to the CPU - it is not on the global memory bus. */ /* XXX: what if the base changes? */ - sysbus_mmio_map(d, 0, MSI_ADDR_BASE); + sysbus_mmio_map(sysbus_from_qdev(dev), 0, MSI_ADDR_BASE); apic_mapped = 1; } - msi_supported = true; + /* KVM does not support MSI yet. */ + if (!kvm_enabled() || !kvm_irqchip_in_kernel()) { + msi_supported = true; + } return dev; } diff --git a/kvm.h b/kvm.h index a3c87af..446360a 100644 --- a/kvm.h +++ b/kvm.h @@ -134,6 +134,9 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level); void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin); int kvm_irqchip_commit_routes(KVMState *s); +int kvm_put_apic(CPUState *env); +int kvm_get_apic(CPUState *env); + struct kvm_guest_debug; struct kvm_debug_exit_arch; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 9d1191f..956f436 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1450,6 +1450,10 @@ int kvm_arch_put_registers(CPUState *env, int level) if (ret < 0) { return ret; } + ret = kvm_put_apic(env); + if (ret < 0) { + return ret; + } } ret = kvm_put_vcpu_events(env, level); if (ret < 0) { @@ -1497,6 +1501,10 @@ int kvm_arch_get_registers(CPUState *env) if (ret < 0) { return ret; } + ret = kvm_get_apic(env); + if (ret < 0) { + return ret; + } ret = kvm_get_vcpu_events(env); if (ret < 0) { return ret;
This introduces the alternative APIC backend which makes use of KVM's in-kernel device model. External NMI injection via LINT1 is emulated by checking the current state of the in-kernel APIC, only injecting a NMI into the VCPU if LINT1 is unmasked and configured to DM_NMI. MSI is not yet supported, so we disable this when the in-kernel model is in use. CC: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> --- Makefile.target | 2 +- hw/kvm/apic.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/pc.c | 15 ++++-- kvm.h | 3 + target-i386/kvm.c | 8 +++ 5 files changed, 176 insertions(+), 6 deletions(-) create mode 100644 hw/kvm/apic.c