Patchwork [v5,06/16] apic: Introduce backend/frontend infrastructure for KVM reuse

login
register
mail settings
Submitter Jan Kiszka
Date Dec. 15, 2011, 12:33 p.m.
Message ID <c7844d420d29d281ec17416742cb18b464351d6a.1323952403.git.jan.kiszka@siemens.com>
Download mbox | patch
Permalink /patch/131634/
State New
Headers show

Comments

Jan Kiszka - Dec. 15, 2011, 12:33 p.m.
The KVM in-kernel APIC model will reuse parts of the user space model
while providing the same frontend view to guest and most management
interfaces. Introduce an APIC backend concept to encapsulate those
parts that will tell user space and KVM model apart. The backend offers
callback hooks for init, base/tpr setting, and the external NMI delivery
that will be implemented accordingly.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 Makefile.target    |    2 +-
 hw/apic.c          |  285 +++-------------------------------------------------
 hw/apic.h          |    1 -
 hw/apic_common.c   |  265 ++++++++++++++++++++++++++++++++++++++++++++++++
 hw/apic_internal.h |  119 ++++++++++++++++++++++
 hw/pc.c            |    1 +
 6 files changed, 401 insertions(+), 272 deletions(-)
 create mode 100644 hw/apic_common.c
 create mode 100644 hw/apic_internal.h
Anthony Liguori - Dec. 19, 2011, 10:14 p.m.
On 12/15/2011 06:33 AM, Jan Kiszka wrote:
> The KVM in-kernel APIC model will reuse parts of the user space model
> while providing the same frontend view to guest and most management
> interfaces. Introduce an APIC backend concept to encapsulate those
> parts that will tell user space and KVM model apart. The backend offers
> callback hooks for init, base/tpr setting, and the external NMI delivery
> that will be implemented accordingly.
>
> Signed-off-by: Jan Kiszka<jan.kiszka@siemens.com>
> ---
>   Makefile.target    |    2 +-
>   hw/apic.c          |  285 +++-------------------------------------------------
>   hw/apic.h          |    1 -
>   hw/apic_common.c   |  265 ++++++++++++++++++++++++++++++++++++++++++++++++
>   hw/apic_internal.h |  119 ++++++++++++++++++++++
>   hw/pc.c            |    1 +
>   6 files changed, 401 insertions(+), 272 deletions(-)
>   create mode 100644 hw/apic_common.c
>   create mode 100644 hw/apic_internal.h
>
> diff --git a/Makefile.target b/Makefile.target
> index 1d24a30..c46f062 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -231,7 +231,7 @@ obj-$(CONFIG_IVSHMEM) += ivshmem.o
>   # Hardware support
>   obj-i386-y += vga.o
>   obj-i386-y += mc146818rtc.o pc.o
> -obj-i386-y += cirrus_vga.o sga.o apic.o ioapic.o piix_pci.o
> +obj-i386-y += cirrus_vga.o sga.o apic_common.o apic.o ioapic.o piix_pci.o
>   obj-i386-y += vmport.o
>   obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>   obj-i386-y += debugcon.o multiboot.o
> diff --git a/hw/apic.c b/hw/apic.c
> index bec493b..5fa3111 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -16,53 +16,13 @@
>    * You should have received a copy of the GNU Lesser General Public
>    * License along with this library; if not, see<http://www.gnu.org/licenses/>
>    */
> -#include "hw.h"
> +#include "apic_internal.h"
>   #include "apic.h"
>   #include "ioapic.h"
> -#include "qemu-timer.h"
>   #include "host-utils.h"
> -#include "sysbus.h"
>   #include "trace.h"
>   #include "pc.h"
>
> -/* APIC Local Vector Table */
> -#define APIC_LVT_TIMER   0
> -#define APIC_LVT_THERMAL 1
> -#define APIC_LVT_PERFORM 2
> -#define APIC_LVT_LINT0   3
> -#define APIC_LVT_LINT1   4
> -#define APIC_LVT_ERROR   5
> -#define APIC_LVT_NB      6
> -
> -/* APIC delivery modes */
> -#define APIC_DM_FIXED	0
> -#define APIC_DM_LOWPRI	1
> -#define APIC_DM_SMI	2
> -#define APIC_DM_NMI	4
> -#define APIC_DM_INIT	5
> -#define APIC_DM_SIPI	6
> -#define APIC_DM_EXTINT	7
> -
> -/* APIC destination mode */
> -#define APIC_DESTMODE_FLAT	0xf
> -#define APIC_DESTMODE_CLUSTER	1
> -
> -#define APIC_TRIGGER_EDGE  0
> -#define APIC_TRIGGER_LEVEL 1
> -
> -#define	APIC_LVT_TIMER_PERIODIC		(1<<17)
> -#define	APIC_LVT_MASKED			(1<<16)
> -#define	APIC_LVT_LEVEL_TRIGGER		(1<<15)
> -#define	APIC_LVT_REMOTE_IRR		(1<<14)
> -#define	APIC_INPUT_POLARITY		(1<<13)
> -#define	APIC_SEND_PENDING		(1<<12)
> -
> -#define ESR_ILLEGAL_ADDRESS (1<<  7)
> -
> -#define APIC_SV_DIRECTED_IO             (1<<12)
> -#define APIC_SV_ENABLE                  (1<<8)
> -
> -#define MAX_APICS 255
>   #define MAX_APIC_WORDS 8
>
>   /* Intel APIC constants: from include/asm/msidef.h */
> @@ -75,40 +35,7 @@
>   #define MSI_ADDR_DEST_ID_SHIFT		12
>   #define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
>
> -#define MSI_ADDR_SIZE                   0x100000
> -
> -typedef struct APICState APICState;
> -
> -struct APICState {
> -    SysBusDevice busdev;
> -    MemoryRegion io_memory;
> -    void *cpu_env;
> -    uint32_t apicbase;
> -    uint8_t id;
> -    uint8_t arb_id;
> -    uint8_t tpr;
> -    uint32_t spurious_vec;
> -    uint8_t log_dest;
> -    uint8_t dest_mode;
> -    uint32_t isr[8];  /* in service register */
> -    uint32_t tmr[8];  /* trigger mode register */
> -    uint32_t irr[8]; /* interrupt request register */
> -    uint32_t lvt[APIC_LVT_NB];
> -    uint32_t esr; /* error register */
> -    uint32_t icr[2];
> -
> -    uint32_t divide_conf;
> -    int count_shift;
> -    uint32_t initial_count;
> -    int64_t initial_count_load_time, next_time;
> -    uint32_t idx;
> -    QEMUTimer *timer;
> -    int sipi_vector;
> -    int wait_for_sipi;
> -};
> -
>   static APICState *local_apics[MAX_APICS + 1];
> -static int apic_irq_delivered;
>
>   static void apic_set_irq(APICState *s, int vector_num, int trigger_mode);
>   static void apic_update_irq(APICState *s);
> @@ -205,10 +132,8 @@ void apic_deliver_pic_intr(DeviceState *d, int level)
>       }
>   }
>
> -void apic_deliver_nmi(DeviceState *d)
> +static void apic_external_nmi(APICState *s)
>   {
> -    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> -
>       apic_local_deliver(s, APIC_LVT_LINT1);
>   }
>
> @@ -300,14 +225,8 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
>       apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
>   }
>
> -void cpu_set_apic_base(DeviceState *d, uint64_t val)
> +static void apic_set_base(APICState *s, uint64_t val)
>   {
> -    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> -
> -    trace_cpu_set_apic_base(val);
> -
> -    if (!s)
> -        return;
>       s->apicbase = (val&  0xfffff000) |
>           (s->apicbase&  (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
>       /* if disabled, cannot be enabled again */
> @@ -318,32 +237,12 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val)
>       }
>   }
>
> -uint64_t cpu_get_apic_base(DeviceState *d)
> +static void apic_set_tpr(APICState *s, uint8_t val)
>   {
> -    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> -
> -    trace_cpu_get_apic_base(s ? (uint64_t)s->apicbase: 0);
> -
> -    return s ? s->apicbase : 0;
> -}
> -
> -void cpu_set_apic_tpr(DeviceState *d, uint8_t val)
> -{
> -    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> -
> -    if (!s)
> -        return;
>       s->tpr = (val&  0x0f)<<  4;
>       apic_update_irq(s);
>   }
>
> -uint8_t cpu_get_apic_tpr(DeviceState *d)
> -{
> -    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> -
> -    return s ? s->tpr>>  4 : 0;
> -}
> -
>   /* return -1 if no bit is set */
>   static int get_highest_priority_int(uint32_t *tab)
>   {
> @@ -413,27 +312,6 @@ static void apic_update_irq(APICState *s)
>       }
>   }
>
> -void apic_report_irq_delivered(int delivered)
> -{
> -    apic_irq_delivered += delivered;
> -
> -    trace_apic_report_irq_delivered(apic_irq_delivered);
> -}
> -
> -void apic_reset_irq_delivered(void)
> -{
> -    trace_apic_reset_irq_delivered(apic_irq_delivered);
> -
> -    apic_irq_delivered = 0;
> -}
> -
> -int apic_get_irq_delivered(void)
> -{
> -    trace_apic_get_irq_delivered(apic_irq_delivered);
> -
> -    return apic_irq_delivered;
> -}
> -
>   static void apic_set_irq(APICState *s, int vector_num, int trigger_mode)
>   {
>       apic_report_irq_delivered(!get_bit(s->irr, vector_num));
> @@ -515,35 +393,6 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
>       }
>   }
>
> -void apic_init_reset(DeviceState *d)
> -{
> -    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> -    int i;
> -
> -    if (!s)
> -        return;
> -
> -    s->tpr = 0;
> -    s->spurious_vec = 0xff;
> -    s->log_dest = 0;
> -    s->dest_mode = 0xf;
> -    memset(s->isr, 0, sizeof(s->isr));
> -    memset(s->tmr, 0, sizeof(s->tmr));
> -    memset(s->irr, 0, sizeof(s->irr));
> -    for(i = 0; i<  APIC_LVT_NB; i++)
> -        s->lvt[i] = 1<<  16; /* mask LVT */
> -    s->esr = 0;
> -    memset(s->icr, 0, sizeof(s->icr));
> -    s->divide_conf = 0;
> -    s->count_shift = 0;
> -    s->initial_count = 0;
> -    s->initial_count_load_time = 0;
> -    s->next_time = 0;
> -    s->wait_for_sipi = 1;
> -
> -    qemu_del_timer(s->timer);
> -}
> -
>   static void apic_startup(APICState *s, int vector_num)
>   {
>       s->sipi_vector = vector_num;
> @@ -904,96 +753,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
>       }
>   }
>
> -/* This function is only used for old state version 1 and 2 */
> -static int apic_load_old(QEMUFile *f, void *opaque, int version_id)
> -{
> -    APICState *s = opaque;
> -    int i;
> -
> -    if (version_id>  2)
> -        return -EINVAL;
> -
> -    /* XXX: what if the base changes? (registered memory regions) */
> -    qemu_get_be32s(f,&s->apicbase);
> -    qemu_get_8s(f,&s->id);
> -    qemu_get_8s(f,&s->arb_id);
> -    qemu_get_8s(f,&s->tpr);
> -    qemu_get_be32s(f,&s->spurious_vec);
> -    qemu_get_8s(f,&s->log_dest);
> -    qemu_get_8s(f,&s->dest_mode);
> -    for (i = 0; i<  8; i++) {
> -        qemu_get_be32s(f,&s->isr[i]);
> -        qemu_get_be32s(f,&s->tmr[i]);
> -        qemu_get_be32s(f,&s->irr[i]);
> -    }
> -    for (i = 0; i<  APIC_LVT_NB; i++) {
> -        qemu_get_be32s(f,&s->lvt[i]);
> -    }
> -    qemu_get_be32s(f,&s->esr);
> -    qemu_get_be32s(f,&s->icr[0]);
> -    qemu_get_be32s(f,&s->icr[1]);
> -    qemu_get_be32s(f,&s->divide_conf);
> -    s->count_shift=qemu_get_be32(f);
> -    qemu_get_be32s(f,&s->initial_count);
> -    s->initial_count_load_time=qemu_get_be64(f);
> -    s->next_time=qemu_get_be64(f);
> -
> -    if (version_id>= 2)
> -        qemu_get_timer(f, s->timer);
> -    return 0;
> -}
> -
> -static const VMStateDescription vmstate_apic = {
> -    .name = "apic",
> -    .version_id = 3,
> -    .minimum_version_id = 3,
> -    .minimum_version_id_old = 1,
> -    .load_state_old = apic_load_old,
> -    .fields      = (VMStateField []) {
> -        VMSTATE_UINT32(apicbase, APICState),
> -        VMSTATE_UINT8(id, APICState),
> -        VMSTATE_UINT8(arb_id, APICState),
> -        VMSTATE_UINT8(tpr, APICState),
> -        VMSTATE_UINT32(spurious_vec, APICState),
> -        VMSTATE_UINT8(log_dest, APICState),
> -        VMSTATE_UINT8(dest_mode, APICState),
> -        VMSTATE_UINT32_ARRAY(isr, APICState, 8),
> -        VMSTATE_UINT32_ARRAY(tmr, APICState, 8),
> -        VMSTATE_UINT32_ARRAY(irr, APICState, 8),
> -        VMSTATE_UINT32_ARRAY(lvt, APICState, APIC_LVT_NB),
> -        VMSTATE_UINT32(esr, APICState),
> -        VMSTATE_UINT32_ARRAY(icr, APICState, 2),
> -        VMSTATE_UINT32(divide_conf, APICState),
> -        VMSTATE_INT32(count_shift, APICState),
> -        VMSTATE_UINT32(initial_count, APICState),
> -        VMSTATE_INT64(initial_count_load_time, APICState),
> -        VMSTATE_INT64(next_time, APICState),
> -        VMSTATE_TIMER(timer, APICState),
> -        VMSTATE_END_OF_LIST()
> -    }
> -};
> -
> -static void apic_reset(DeviceState *d)
> -{
> -    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> -    int bsp;
> -
> -    bsp = cpu_is_bsp(s->cpu_env);
> -    s->apicbase = 0xfee00000 |
> -        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
> -
> -    apic_init_reset(d);
> -
> -    if (bsp) {
> -        /*
> -         * LINT0 delivery mode on CPU #0 is set to ExtInt at initialization
> -         * time typically by BIOS, so PIC interrupt can be delivered to the
> -         * processor when local APIC is enabled.
> -         */
> -        s->lvt[APIC_LVT_LINT0] = 0x700;
> -    }
> -}
> -
>   static const MemoryRegionOps apic_io_ops = {
>       .old_mmio = {
>           .read = { apic_mem_readb, apic_mem_readw, apic_mem_readl, },
> @@ -1002,41 +761,27 @@ static const MemoryRegionOps apic_io_ops = {
>       .endianness = DEVICE_NATIVE_ENDIAN,
>   };
>
> -static int apic_init1(SysBusDevice *dev)
> +static void apic_backend_init(APICState *s)
>   {
> -    APICState *s = FROM_SYSBUS(APICState, dev);
> -    static int last_apic_idx;
> -
> -    if (last_apic_idx>= MAX_APICS) {
> -        return -1;
> -    }
> -    memory_region_init_io(&s->io_memory,&apic_io_ops, s, "apic",
> -                          MSI_ADDR_SIZE);
> -    sysbus_init_mmio(dev,&s->io_memory);
> +    memory_region_init_io(&s->io_memory,&apic_io_ops, s, "apic-msi",
> +                          MSI_SPACE_SIZE);
>
>       s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s);
> -    s->idx = last_apic_idx++;
>       local_apics[s->idx] = s;
> -    return 0;
>   }
>
> -static SysBusDeviceInfo apic_info = {
> -    .init = apic_init1,
> -    .qdev.name = "apic",
> -    .qdev.size = sizeof(APICState),
> -    .qdev.vmsd =&vmstate_apic,
> -    .qdev.reset = apic_reset,
> -    .qdev.no_user = 1,
> -    .qdev.props = (Property[]) {
> -        DEFINE_PROP_UINT8("id", APICState, id, -1),
> -        DEFINE_PROP_PTR("cpu_env", APICState, cpu_env),
> -        DEFINE_PROP_END_OF_LIST(),
> -    }
> +static APICBackend apic_backend = {
> +    .name = "QEMU",
> +    .init = apic_backend_init,
> +    .set_base = apic_set_base,
> +    .set_tpr = apic_set_tpr,
> +    .external_nmi = apic_external_nmi,
>   };
>
>   static void apic_register_devices(void)
>   {
> -    sysbus_register_withprop(&apic_info);
> +    apic_register_device();
> +    apic_register_backend(&apic_backend);
>   }
>
>   device_init(apic_register_devices)
> diff --git a/hw/apic.h b/hw/apic.h
> index 8173d8a..a62d83b 100644
> --- a/hw/apic.h
> +++ b/hw/apic.h
> @@ -10,7 +10,6 @@ int apic_accept_pic_intr(DeviceState *s);
>   void apic_deliver_pic_intr(DeviceState *s, int level);
>   void apic_deliver_nmi(DeviceState *d);
>   int apic_get_interrupt(DeviceState *s);
> -void apic_report_irq_delivered(int delivered);
>   void apic_reset_irq_delivered(void);
>   int apic_get_irq_delivered(void);
>   void cpu_set_apic_base(DeviceState *s, uint64_t val);
> diff --git a/hw/apic_common.c b/hw/apic_common.c
> new file mode 100644
> index 0000000..4cdc45c
> --- /dev/null
> +++ b/hw/apic_common.c
> @@ -0,0 +1,265 @@
> +/*
> + *  APIC support - common bits of emulated and KVM kernel model
> + *
> + *  Copyright (c) 2004-2005 Fabrice Bellard
> + *  Copyright (c) 2011      Jan Kiszka, Siemens AG
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see<http://www.gnu.org/licenses/>
> + */
> +#include "apic.h"
> +#include "apic_internal.h"
> +#include "trace.h"
> +
> +static QSIMPLEQ_HEAD(, APICBackend) backends =
> +    QSIMPLEQ_HEAD_INITIALIZER(backends);
> +static int apic_irq_delivered;
> +
> +void cpu_set_apic_base(DeviceState *d, uint64_t val)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> +
> +    trace_cpu_set_apic_base(val);
> +
> +    if (s) {
> +        s->backend->set_base(s, val);
> +    }
> +}
> +
> +uint64_t cpu_get_apic_base(DeviceState *d)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> +
> +    trace_cpu_get_apic_base(s ? (uint64_t)s->apicbase : 0);
> +
> +    return s ? s->apicbase : 0;
> +}
> +
> +void cpu_set_apic_tpr(DeviceState *d, uint8_t val)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> +
> +    if (s) {
> +        s->backend->set_tpr(s, val);
> +    }
> +}
> +
> +uint8_t cpu_get_apic_tpr(DeviceState *d)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> +
> +    return s ? s->tpr>>  4 : 0;
> +}
> +
> +void apic_report_irq_delivered(int delivered)
> +{
> +    apic_irq_delivered += delivered;
> +
> +    trace_apic_report_irq_delivered(apic_irq_delivered);
> +}
> +
> +void apic_reset_irq_delivered(void)
> +{
> +    trace_apic_reset_irq_delivered(apic_irq_delivered);
> +
> +    apic_irq_delivered = 0;
> +}
> +
> +int apic_get_irq_delivered(void)
> +{
> +    trace_apic_get_irq_delivered(apic_irq_delivered);
> +
> +    return apic_irq_delivered;
> +}
> +
> +void apic_deliver_nmi(DeviceState *d)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> +
> +    s->backend->external_nmi(s);
> +}
> +
> +void apic_init_reset(DeviceState *d)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> +    int i;
> +
> +    if (!s) {
> +        return;
> +    }
> +    s->tpr = 0;
> +    s->spurious_vec = 0xff;
> +    s->log_dest = 0;
> +    s->dest_mode = 0xf;
> +    memset(s->isr, 0, sizeof(s->isr));
> +    memset(s->tmr, 0, sizeof(s->tmr));
> +    memset(s->irr, 0, sizeof(s->irr));
> +    for (i = 0; i<  APIC_LVT_NB; i++) {
> +        s->lvt[i] = APIC_LVT_MASKED;
> +    }
> +    s->esr = 0;
> +    memset(s->icr, 0, sizeof(s->icr));
> +    s->divide_conf = 0;
> +    s->count_shift = 0;
> +    s->initial_count = 0;
> +    s->initial_count_load_time = 0;
> +    s->next_time = 0;
> +    s->wait_for_sipi = 1;
> +
> +    qemu_del_timer(s->timer);
> +}
> +
> +static void apic_reset(DeviceState *d)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> +    bool bsp;
> +
> +    bsp = cpu_is_bsp(s->cpu_env);
> +    s->apicbase = 0xfee00000 |
> +        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
> +
> +    apic_init_reset(d);
> +
> +    if (bsp) {
> +        /*
> +         * LINT0 delivery mode on CPU #0 is set to ExtInt at initialization
> +         * time typically by BIOS, so PIC interrupt can be delivered to the
> +         * processor when local APIC is enabled.
> +         */
> +        s->lvt[APIC_LVT_LINT0] = 0x700;
> +    }
> +}
> +
> +/* This function is only used for old state version 1 and 2 */
> +static int apic_load_old(QEMUFile *f, void *opaque, int version_id)
> +{
> +    APICState *s = opaque;
> +    int i;
> +
> +    if (version_id>  2) {
> +        return -EINVAL;
> +    }
> +
> +    /* XXX: what if the base changes? (registered memory regions) */
> +    qemu_get_be32s(f,&s->apicbase);
> +    qemu_get_8s(f,&s->id);
> +    qemu_get_8s(f,&s->arb_id);
> +    qemu_get_8s(f,&s->tpr);
> +    qemu_get_be32s(f,&s->spurious_vec);
> +    qemu_get_8s(f,&s->log_dest);
> +    qemu_get_8s(f,&s->dest_mode);
> +    for (i = 0; i<  8; i++) {
> +        qemu_get_be32s(f,&s->isr[i]);
> +        qemu_get_be32s(f,&s->tmr[i]);
> +        qemu_get_be32s(f,&s->irr[i]);
> +    }
> +    for (i = 0; i<  APIC_LVT_NB; i++) {
> +        qemu_get_be32s(f,&s->lvt[i]);
> +    }
> +    qemu_get_be32s(f,&s->esr);
> +    qemu_get_be32s(f,&s->icr[0]);
> +    qemu_get_be32s(f,&s->icr[1]);
> +    qemu_get_be32s(f,&s->divide_conf);
> +    s->count_shift = qemu_get_be32(f);
> +    qemu_get_be32s(f,&s->initial_count);
> +    s->initial_count_load_time = qemu_get_be64(f);
> +    s->next_time = qemu_get_be64(f);
> +
> +    if (version_id>= 2) {
> +        qemu_get_timer(f, s->timer);
> +    }
> +    return 0;
> +}
> +
> +static const VMStateDescription vmstate_apic = {
> +    .name = "apic",
> +    .version_id = 3,
> +    .minimum_version_id = 3,
> +    .minimum_version_id_old = 1,
> +    .load_state_old = apic_load_old,
> +    .fields      = (VMStateField[]) {
> +        VMSTATE_UINT32(apicbase, APICState),
> +        VMSTATE_UINT8(id, APICState),
> +        VMSTATE_UINT8(arb_id, APICState),
> +        VMSTATE_UINT8(tpr, APICState),
> +        VMSTATE_UINT32(spurious_vec, APICState),
> +        VMSTATE_UINT8(log_dest, APICState),
> +        VMSTATE_UINT8(dest_mode, APICState),
> +        VMSTATE_UINT32_ARRAY(isr, APICState, 8),
> +        VMSTATE_UINT32_ARRAY(tmr, APICState, 8),
> +        VMSTATE_UINT32_ARRAY(irr, APICState, 8),
> +        VMSTATE_UINT32_ARRAY(lvt, APICState, APIC_LVT_NB),
> +        VMSTATE_UINT32(esr, APICState),
> +        VMSTATE_UINT32_ARRAY(icr, APICState, 2),
> +        VMSTATE_UINT32(divide_conf, APICState),
> +        VMSTATE_INT32(count_shift, APICState),
> +        VMSTATE_UINT32(initial_count, APICState),
> +        VMSTATE_INT64(initial_count_load_time, APICState),
> +        VMSTATE_INT64(next_time, APICState),
> +        VMSTATE_TIMER(timer, APICState),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static int apic_init(SysBusDevice *dev)
> +{
> +    APICState *s = FROM_SYSBUS(APICState, dev);
> +    static int apic_no;
> +    APICBackend *b;
> +
> +    if (apic_no>= MAX_APICS) {
> +        return -1;
> +    }
> +    s->idx = apic_no++;
> +
> +    QSIMPLEQ_FOREACH(b,&backends, entry) {
> +        if (strcmp(b->name, s->backend_name) == 0) {
> +            s->backend = b;
> +            break;
> +        }
> +    }
> +    if (!s->backend) {
> +        hw_error("APIC backend '%s' not found!", s->backend_name);
> +        exit(1);
> +    }
> +
> +    b->init(s);
> +
> +    sysbus_init_mmio(&s->busdev,&s->io_memory);
> +    return 0;
> +}
> +
> +static SysBusDeviceInfo apic_info = {
> +    .init = apic_init,
> +    .qdev.name = "apic",
> +    .qdev.size = sizeof(APICState),
> +    .qdev.vmsd =&vmstate_apic,
> +    .qdev.reset = apic_reset,
> +    .qdev.no_user = 1,
> +    .qdev.props = (Property[]) {
> +        DEFINE_PROP_UINT8("id", APICState, id, -1),
> +        DEFINE_PROP_PTR("cpu_env", APICState, cpu_env),
> +        DEFINE_PROP_STRING("backend", APICState, backend_name),
> +        DEFINE_PROP_END_OF_LIST(),
> +    }
> +};
> +
> +void apic_register_backend(APICBackend *backend)
> +{
> +    QSIMPLEQ_INSERT_TAIL(&backends, backend, entry);
> +}
> +
> +void apic_register_device(void)
> +{
> +    sysbus_register_withprop(&apic_info);
> +}
> diff --git a/hw/apic_internal.h b/hw/apic_internal.h
> new file mode 100644
> index 0000000..6cbd901
> --- /dev/null
> +++ b/hw/apic_internal.h
> @@ -0,0 +1,119 @@
> +/*
> + *  APIC support - internal interfaces
> + *
> + *  Copyright (c) 2004-2005 Fabrice Bellard
> + *  Copyright (c) 2011      Jan Kiszka, Siemens AG
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see<http://www.gnu.org/licenses/>
> + */
> +#ifndef QEMU_APIC_INTERNAL_H
> +#define QEMU_APIC_INTERNAL_H
> +
> +#include "memory.h"
> +#include "sysbus.h"
> +#include "qemu-timer.h"
> +#include "qemu-queue.h"
> +
> +/* APIC Local Vector Table */
> +#define APIC_LVT_TIMER                  0
> +#define APIC_LVT_THERMAL                1
> +#define APIC_LVT_PERFORM                2
> +#define APIC_LVT_LINT0                  3
> +#define APIC_LVT_LINT1                  4
> +#define APIC_LVT_ERROR                  5
> +#define APIC_LVT_NB                     6
> +
> +/* APIC delivery modes */
> +#define APIC_DM_FIXED                   0
> +#define APIC_DM_LOWPRI                  1
> +#define APIC_DM_SMI                     2
> +#define APIC_DM_NMI                     4
> +#define APIC_DM_INIT                    5
> +#define APIC_DM_SIPI                    6
> +#define APIC_DM_EXTINT                  7
> +
> +/* APIC destination mode */
> +#define APIC_DESTMODE_FLAT              0xf
> +#define APIC_DESTMODE_CLUSTER           1
> +
> +#define APIC_TRIGGER_EDGE               0
> +#define APIC_TRIGGER_LEVEL              1
> +
> +#define APIC_LVT_TIMER_PERIODIC         (1<<17)
> +#define APIC_LVT_MASKED                 (1<<16)
> +#define APIC_LVT_LEVEL_TRIGGER          (1<<15)
> +#define APIC_LVT_REMOTE_IRR             (1<<14)
> +#define APIC_INPUT_POLARITY             (1<<13)
> +#define APIC_SEND_PENDING               (1<<12)
> +
> +#define ESR_ILLEGAL_ADDRESS (1<<  7)
> +
> +#define APIC_SV_DIRECTED_IO             (1<<12)
> +#define APIC_SV_ENABLE                  (1<<8)
> +
> +#define MAX_APICS 255
> +
> +#define MSI_SPACE_SIZE                  0x100000
> +
> +typedef struct APICBackend APICBackend;
> +typedef struct APICState APICState;
> +
> +struct APICBackend {
> +    const char *name;
> +    void (*init)(APICState *s);
> +    void (*set_base)(APICState *s, uint64_t val);
> +    void (*set_tpr)(APICState *s, uint8_t val);
> +    void (*external_nmi)(APICState *s);
> +
> +    QSIMPLEQ_ENTRY(APICBackend) entry;
> +};


Wouldn't this be more naturally modeled by making APICBackend be a base class?

In qdev today, this would look like:

struct APICCommon {
    SysBusDevice qdev;
    ...
};

struct APICCommonInfo {
     DeviceInfo qdev;
     void (*init)(APICState *s);
     void (*set_base)(APICState *s, uint64_t val);
     void (*set_tpr)(APICState *s, uint8_t val);
     void (*external_nmi)(APICState *s);
};

Take a look at SCSIDevice for an example of this in practice.  This is nicer 
because as we move save/load into devices methods, it becomes natural to define 
the state and save/load function in the base class.  Provided it only uses base 
class state, it lets save/load be compatible between both in-kernel and in-qemu 
device model.

Regards,

Anthony Liguori

Regards,

Anthony Liguori
Jan Kiszka - Dec. 19, 2011, 11:32 p.m.
[ Please strip your replies a bit. I always worry to miss a comment when
scrolling down dozens of pages. ]

On 2011-12-19 23:14, Anthony Liguori wrote:
>> +
>> +struct APICBackend {
>> +    const char *name;
>> +    void (*init)(APICState *s);
>> +    void (*set_base)(APICState *s, uint64_t val);
>> +    void (*set_tpr)(APICState *s, uint8_t val);
>> +    void (*external_nmi)(APICState *s);
>> +
>> +    QSIMPLEQ_ENTRY(APICBackend) entry;
>> +};
> 
> 
> Wouldn't this be more naturally modeled by making APICBackend be a base
> class?
> 
> In qdev today, this would look like:
> 
> struct APICCommon {
>    SysBusDevice qdev;
>    ...
> };
> 
> struct APICCommonInfo {
>     DeviceInfo qdev;
>     void (*init)(APICState *s);
>     void (*set_base)(APICState *s, uint64_t val);
>     void (*set_tpr)(APICState *s, uint8_t val);
>     void (*external_nmi)(APICState *s);
> };
> 
> Take a look at SCSIDevice for an example of this in practice.  This is
> nicer because as we move save/load into devices methods, it becomes
> natural to define the state and save/load function in the base class. 
> Provided it only uses base class state, it lets save/load be compatible
> between both in-kernel and in-qemu device model.

The difference is (unless I completely miss your point) that a common
SCSI base class is used by different derived classes. Here we have a
common frontend class but different base classes, so to say. And we have
a mechanism to chose where to inherit from on instantiation. Precisely
this allows to keep the compatibility between in-kernel and user space
model in this series.

Jan
Anthony Liguori - Dec. 20, 2011, 12:28 a.m.
On 12/19/2011 05:32 PM, Jan Kiszka wrote:
>> struct APICCommonInfo {
>>      DeviceInfo qdev;
>>      void (*init)(APICState *s);
>>      void (*set_base)(APICState *s, uint64_t val);
>>      void (*set_tpr)(APICState *s, uint8_t val);
>>      void (*external_nmi)(APICState *s);
>> };
>>
>> Take a look at SCSIDevice for an example of this in practice.  This is
>> nicer because as we move save/load into devices methods, it becomes
>> natural to define the state and save/load function in the base class.
>> Provided it only uses base class state, it lets save/load be compatible
>> between both in-kernel and in-qemu device model.
>
> The difference is (unless I completely miss your point) that a common
> SCSI base class is used by different derived classes.

The 'frontend' is the common code and the 'backend' are the bits that are 
different, no?

We ultimately want there to be two devices that share all of the 'frontend' code 
by providing different 'backend' implementations.

So make the 'frontend' a base class that provides a set of abstract virtual 
methods (the set you have as the 'backend' interface).  Each device instance 
then inherits from the base class and provides its own implementation of the 
virtual methods.

> Here we have a
> common frontend class but different base classes, so to say. And we have
> a mechanism to chose where to inherit from on instantiation. Precisely
> this allows to keep the compatibility between in-kernel and user space
> model in this series.

Okay, so I really think this is the problem.  The in-kernel APIC is a separate 
device, no a property of the userspace APIC device.

It should be modeled as two separate devices.

Regards,

Anthony Liguori

>
> Jan
>
Jan Kiszka - Dec. 20, 2011, 12:32 a.m.
On 2011-12-20 01:28, Anthony Liguori wrote:
> On 12/19/2011 05:32 PM, Jan Kiszka wrote:
>>> struct APICCommonInfo {
>>>      DeviceInfo qdev;
>>>      void (*init)(APICState *s);
>>>      void (*set_base)(APICState *s, uint64_t val);
>>>      void (*set_tpr)(APICState *s, uint8_t val);
>>>      void (*external_nmi)(APICState *s);
>>> };
>>>
>>> Take a look at SCSIDevice for an example of this in practice.  This is
>>> nicer because as we move save/load into devices methods, it becomes
>>> natural to define the state and save/load function in the base class.
>>> Provided it only uses base class state, it lets save/load be compatible
>>> between both in-kernel and in-qemu device model.
>>
>> The difference is (unless I completely miss your point) that a common
>> SCSI base class is used by different derived classes.
> 
> The 'frontend' is the common code and the 'backend' are the bits that
> are different, no?
> 
> We ultimately want there to be two devices that share all of the
> 'frontend' code by providing different 'backend' implementations.
> 
> So make the 'frontend' a base class that provides a set of abstract
> virtual methods (the set you have as the 'backend' interface).  Each
> device instance then inherits from the base class and provides its own
> implementation of the virtual methods.
> 
>> Here we have a
>> common frontend class but different base classes, so to say. And we have
>> a mechanism to chose where to inherit from on instantiation. Precisely
>> this allows to keep the compatibility between in-kernel and user space
>> model in this series.
> 
> Okay, so I really think this is the problem.  The in-kernel APIC is a
> separate device, no a property of the userspace APIC device.
> 
> It should be modeled as two separate devices.

That was v1 of my patches. Avi didn't like it, I tried it like this, and
in the end I had to agree. So, no, I don't think we want such a model.

Jan
Anthony Liguori - Dec. 20, 2011, 12:38 a.m.
On 12/19/2011 06:32 PM, Jan Kiszka wrote:
> On 2011-12-20 01:28, Anthony Liguori wrote:
>> On 12/19/2011 05:32 PM, Jan Kiszka wrote:
>>>> struct APICCommonInfo {
>>>>       DeviceInfo qdev;
>>>>       void (*init)(APICState *s);
>>>>       void (*set_base)(APICState *s, uint64_t val);
>>>>       void (*set_tpr)(APICState *s, uint8_t val);
>>>>       void (*external_nmi)(APICState *s);
>>>> };
>>>>
>>>> Take a look at SCSIDevice for an example of this in practice.  This is
>>>> nicer because as we move save/load into devices methods, it becomes
>>>> natural to define the state and save/load function in the base class.
>>>> Provided it only uses base class state, it lets save/load be compatible
>>>> between both in-kernel and in-qemu device model.
>>>
>>> The difference is (unless I completely miss your point) that a common
>>> SCSI base class is used by different derived classes.
>>
>> The 'frontend' is the common code and the 'backend' are the bits that
>> are different, no?
>>
>> We ultimately want there to be two devices that share all of the
>> 'frontend' code by providing different 'backend' implementations.
>>
>> So make the 'frontend' a base class that provides a set of abstract
>> virtual methods (the set you have as the 'backend' interface).  Each
>> device instance then inherits from the base class and provides its own
>> implementation of the virtual methods.
>>
>>> Here we have a
>>> common frontend class but different base classes, so to say. And we have
>>> a mechanism to chose where to inherit from on instantiation. Precisely
>>> this allows to keep the compatibility between in-kernel and user space
>>> model in this series.
>>
>> Okay, so I really think this is the problem.  The in-kernel APIC is a
>> separate device, no a property of the userspace APIC device.
>>
>> It should be modeled as two separate devices.
>
> That was v1 of my patches. Avi didn't like it, I tried it like this, and
> in the end I had to agree. So, no, I don't think we want such a model.

Yes, we do :-)

The in-kernel APIC is a different implementation of the APIC device.  It's not 
an "accelerator" for the userspace APIC.

All that you're doing here is reinventing qdev.  You're defining your own type 
system (APICBackend), creating a new regression system for it, and then defining 
your own factory function for creating it (through a qdev property).

I'm struggling to understand the reason to avoid using the infrastructure we 
already have to do all of this.

Regards,

Anthony Liguori

>
> Jan
>
Avi Kivity - Dec. 20, 2011, 9:56 a.m.
On 12/20/2011 02:38 AM, Anthony Liguori wrote:
>> That was v1 of my patches. Avi didn't like it, I tried it like this, and
>> in the end I had to agree. So, no, I don't think we want such a model.
>
>
> Yes, we do :-)
>
> The in-kernel APIC is a different implementation of the APIC device. 
> It's not an "accelerator" for the userspace APIC.

A different implementation but not a different device.  Device == spec.

>
> All that you're doing here is reinventing qdev.  You're defining your
> own type system (APICBackend), creating a new regression system for
> it, and then defining your own factory function for creating it
> (through a qdev property).
>
> I'm struggling to understand the reason to avoid using the
> infrastructure we already have to do all of this.

Not every table of function pointers has to be done through qdev (not
that I feel strongly about this - only that there is just one APIC device).
Anthony Liguori - Dec. 20, 2011, 1:41 p.m.
On 12/20/2011 03:56 AM, Avi Kivity wrote:
> On 12/20/2011 02:38 AM, Anthony Liguori wrote:
>>> That was v1 of my patches. Avi didn't like it, I tried it like this, and
>>> in the end I had to agree. So, no, I don't think we want such a model.
>>
>>
>> Yes, we do :-)
>>
>> The in-kernel APIC is a different implementation of the APIC device.
>> It's not an "accelerator" for the userspace APIC.
>
> A different implementation but not a different device.  Device == spec.

If it was hardware, it'd be a fully compatible clone.  The way we would model 
this is via inheritance.

Regards,

Anthony Liguori

>
>>
>> All that you're doing here is reinventing qdev.  You're defining your
>> own type system (APICBackend), creating a new regression system for
>> it, and then defining your own factory function for creating it
>> (through a qdev property).
>>
>> I'm struggling to understand the reason to avoid using the
>> infrastructure we already have to do all of this.
>
> Not every table of function pointers has to be done through qdev (not
> that I feel strongly about this - only that there is just one APIC device).
>
Paolo Bonzini - Dec. 20, 2011, 1:51 p.m.
On 12/20/2011 02:41 PM, Anthony Liguori wrote:
> On 12/20/2011 03:56 AM, Avi Kivity wrote:
>> On 12/20/2011 02:38 AM, Anthony Liguori wrote:
>>>> That was v1 of my patches. Avi didn't like it, I tried it like this,
>>>> and
>>>> in the end I had to agree. So, no, I don't think we want such a model.
>>>
>>>
>>> Yes, we do :-)
>>>
>>> The in-kernel APIC is a different implementation of the APIC device.
>>> It's not an "accelerator" for the userspace APIC.
>>
>> A different implementation but not a different device. Device == spec.
>
> If it was hardware, it'd be a fully compatible clone. The way we would
> model this is via inheritance.

I see your fully compatible clone, and I raise my bridge with a 
different implementation underneath.  It's the same old debate on is-a 
vs has-a.

In QOM parlance Jan implemented this:

     abstract class Object
         abstract class Device
             class APIC: { backend: link<APICBackend> }
         abstract class APICBackend
             class QEMU_APICBackend
             class KVM_APICBackend

and you're proposing this:

     abstract class Object
         abstract class Device
             abstract class APIC
                 class QEMU_APIC
                 class KVM_APIC

Both can be right, both can be wrong.

Paolo
Anthony Liguori - Dec. 20, 2011, 1:54 p.m.
On 12/20/2011 07:51 AM, Paolo Bonzini wrote:
> On 12/20/2011 02:41 PM, Anthony Liguori wrote:
>> On 12/20/2011 03:56 AM, Avi Kivity wrote:
>>> On 12/20/2011 02:38 AM, Anthony Liguori wrote:
>>>>> That was v1 of my patches. Avi didn't like it, I tried it like this,
>>>>> and
>>>>> in the end I had to agree. So, no, I don't think we want such a model.
>>>>
>>>>
>>>> Yes, we do :-)
>>>>
>>>> The in-kernel APIC is a different implementation of the APIC device.
>>>> It's not an "accelerator" for the userspace APIC.
>>>
>>> A different implementation but not a different device. Device == spec.
>>
>> If it was hardware, it'd be a fully compatible clone. The way we would
>> model this is via inheritance.
>
> I see your fully compatible clone, and I raise my bridge with a different
> implementation underneath. It's the same old debate on is-a vs has-a.
>
> In QOM parlance Jan implemented this:
>
> abstract class Object
> abstract class Device
> class APIC: { backend: link<APICBackend> }
> abstract class APICBackend
> class QEMU_APICBackend
> class KVM_APICBackend

I don't fundamentally object to modeling it like this provided that it's modeled 
(and visible) through qdev and not done through a one-off infrastructure.

But yes, you are exactly correct in your observation (and that both can be right).

Regards,

Anthony Liguori

>
> and you're proposing this:
>
> abstract class Object
> abstract class Device
> abstract class APIC
> class QEMU_APIC
> class KVM_APIC
>
> Both can be right, both can be wrong.
>
> Paolo
>
>
Paolo Bonzini - Dec. 20, 2011, 1:57 p.m.
On 12/20/2011 02:54 PM, Anthony Liguori wrote:
>> In QOM parlance Jan implemented this:
>>
>> abstract class Object
>>     abstract class Device
>>         class APIC: { backend: link<APICBackend> }
>>     abstract class APICBackend
>>         class QEMU_APICBackend
>>         class KVM_APICBackend
>
> I don't fundamentally object to modeling it like this provided that it's
> modeled (and visible) through qdev and not done through a one-off
> infrastructure.

There is no superclass of DeviceState, hence doing it through qdev would 
mean introducing a new bus type and so on.  This would be a superb 
example of a useless bus that can disappear with QOM, but I don't see 
why we should take the pain to add it in the first place. :)

We sure can revisit this when the subclassing and interface 
infrastructures of QOM are merged.

Paolo
Anthony Liguori - Dec. 20, 2011, 2:07 p.m.
On 12/20/2011 07:57 AM, Paolo Bonzini wrote:
> On 12/20/2011 02:54 PM, Anthony Liguori wrote:
>>> In QOM parlance Jan implemented this:
>>>
>>> abstract class Object
>>> abstract class Device
>>> class APIC: { backend: link<APICBackend> }
>>> abstract class APICBackend
>>> class QEMU_APICBackend
>>> class KVM_APICBackend
>>
>> I don't fundamentally object to modeling it like this provided that it's
>> modeled (and visible) through qdev and not done through a one-off
>> infrastructure.
>
> There is no superclass of DeviceState, hence doing it through qdev would mean
> introducing a new bus type and so on. This would be a superb example of a
> useless bus that can disappear with QOM, but I don't see why we should take the
> pain to add it in the first place. :)

Right, so let's modeled it for now as inheritance which qdev can cope with.

>
> We sure can revisit this when the subclassing and interface infrastructures of
> QOM are merged.

I'll have patches out this week (just trying to write some more test cases). 
The latest series is below if you're interested.  I fear that it won't be until 
mid to late January before this can be merged though as I want to give folks 
like Markus a chance to review it.

https://github.com/aliguori/qemu/tree/qom-upstream.3

Regards,

Anthony Liguori

>
> Paolo
>
Avi Kivity - Dec. 20, 2011, 2:07 p.m.
On 12/20/2011 03:51 PM, Paolo Bonzini wrote:
> On 12/20/2011 02:41 PM, Anthony Liguori wrote:
>> On 12/20/2011 03:56 AM, Avi Kivity wrote:
>>> On 12/20/2011 02:38 AM, Anthony Liguori wrote:
>>>>> That was v1 of my patches. Avi didn't like it, I tried it like this,
>>>>> and
>>>>> in the end I had to agree. So, no, I don't think we want such a
>>>>> model.
>>>>
>>>>
>>>> Yes, we do :-)
>>>>
>>>> The in-kernel APIC is a different implementation of the APIC device.
>>>> It's not an "accelerator" for the userspace APIC.
>>>
>>> A different implementation but not a different device. Device == spec.
>>
>> If it was hardware, it'd be a fully compatible clone. The way we would
>> model this is via inheritance.
>
> I see your fully compatible clone, and I raise my bridge with a
> different implementation underneath.  It's the same old debate on is-a
> vs has-a.
>
> In QOM parlance Jan implemented this:

QOM is the new C++

>
>     abstract class Object
>         abstract class Device
>             class APIC: { backend: link<APICBackend> }
>         abstract class APICBackend
>             class QEMU_APICBackend
>             class KVM_APICBackend
>
> and you're proposing this:
>
>     abstract class Object
>         abstract class Device
>             abstract class APIC
>                 class QEMU_APIC
>                 class KVM_APIC
>
> Both can be right, both can be wrong.

I don't mind either.  What I don't want:

  abstract class Object
     abstract class Device
        class APIC
        class KVMAPIC
Jan Kiszka - Dec. 20, 2011, 5:02 p.m.
On 2011-12-20 15:07, Anthony Liguori wrote:
> On 12/20/2011 07:57 AM, Paolo Bonzini wrote:
>> On 12/20/2011 02:54 PM, Anthony Liguori wrote:
>>>> In QOM parlance Jan implemented this:
>>>>
>>>> abstract class Object
>>>> abstract class Device
>>>> class APIC: { backend: link<APICBackend> }
>>>> abstract class APICBackend
>>>> class QEMU_APICBackend
>>>> class KVM_APICBackend
>>>
>>> I don't fundamentally object to modeling it like this provided that it's
>>> modeled (and visible) through qdev and not done through a one-off
>>> infrastructure.
>>
>> There is no superclass of DeviceState, hence doing it through qdev
>> would mean
>> introducing a new bus type and so on. This would be a superb example of a
>> useless bus that can disappear with QOM, but I don't see why we should
>> take the
>> pain to add it in the first place. :)
> 
> Right, so let's modeled it for now as inheritance which qdev can cope with.

Do we have a clear plan now how to sort out the addressing issues in
this model? I mean when registering two devices under different names
that are supposed to be addressable under the same alias once
instantiated. I didn't follow recent qtree naming changes in details
unfortunately, if they already enable this.

This does not need to be implemented before merge. I just like to have a
common view on how to address it once it matters (for device inspection).

Jan
Anthony Liguori - Dec. 20, 2011, 7:14 p.m.
On 12/20/2011 11:02 AM, Jan Kiszka wrote:
> On 2011-12-20 15:07, Anthony Liguori wrote:
>> On 12/20/2011 07:57 AM, Paolo Bonzini wrote:
>>> On 12/20/2011 02:54 PM, Anthony Liguori wrote:
>>>>> In QOM parlance Jan implemented this:
>>>>>
>>>>> abstract class Object
>>>>> abstract class Device
>>>>> class APIC: { backend: link<APICBackend>  }
>>>>> abstract class APICBackend
>>>>> class QEMU_APICBackend
>>>>> class KVM_APICBackend
>>>>
>>>> I don't fundamentally object to modeling it like this provided that it's
>>>> modeled (and visible) through qdev and not done through a one-off
>>>> infrastructure.
>>>
>>> There is no superclass of DeviceState, hence doing it through qdev
>>> would mean
>>> introducing a new bus type and so on. This would be a superb example of a
>>> useless bus that can disappear with QOM, but I don't see why we should
>>> take the
>>> pain to add it in the first place. :)
>>
>> Right, so let's modeled it for now as inheritance which qdev can cope with.
>
> Do we have a clear plan now how to sort out the addressing issues in
> this model? I mean when registering two devices under different names
> that are supposed to be addressable under the same alias once
> instantiated. I didn't follow recent qtree naming changes in details
> unfortunately, if they already enable this.

I think everyone is in agreement.  We'll start with an APICBase type that's 
modeled in qdev as a base class.

There will be an APICBaseInfo that will replace APICBackend.

There will be two classes that implement APICBaseInfo, KvmAPIC and APIC.  They 
will be separate devices.

APICBase will register the vmsd and will use the name "apic" to register it. 
You can just set the qdev.vmsd field in the apic_qdev_register() function to 
ensure that both use the same implementation.

>
> This does not need to be implemented before merge. I just like to have a
> common view on how to address it once it matters (for device inspection).

You can do this all today without any pending patches.  As I mentioned earlier, 
I don't mind doing this after the fact if you'd just like to get the current 
series merged.

If your series lands before the QOM series I just posted, then I will need to do 
it as part of the QOM series anyway.

Regards,

Anthony Liguori

> Jan
>
Jan Kiszka - Dec. 20, 2011, 9:23 p.m.
On 2011-12-20 20:14, Anthony Liguori wrote:
> On 12/20/2011 11:02 AM, Jan Kiszka wrote:
>> On 2011-12-20 15:07, Anthony Liguori wrote:
>>> On 12/20/2011 07:57 AM, Paolo Bonzini wrote:
>>>> On 12/20/2011 02:54 PM, Anthony Liguori wrote:
>>>>>> In QOM parlance Jan implemented this:
>>>>>>
>>>>>> abstract class Object
>>>>>> abstract class Device
>>>>>> class APIC: { backend: link<APICBackend>  }
>>>>>> abstract class APICBackend
>>>>>> class QEMU_APICBackend
>>>>>> class KVM_APICBackend
>>>>>
>>>>> I don't fundamentally object to modeling it like this provided that
>>>>> it's
>>>>> modeled (and visible) through qdev and not done through a one-off
>>>>> infrastructure.
>>>>
>>>> There is no superclass of DeviceState, hence doing it through qdev
>>>> would mean
>>>> introducing a new bus type and so on. This would be a superb example
>>>> of a
>>>> useless bus that can disappear with QOM, but I don't see why we should
>>>> take the
>>>> pain to add it in the first place. :)
>>>
>>> Right, so let's modeled it for now as inheritance which qdev can cope
>>> with.
>>
>> Do we have a clear plan now how to sort out the addressing issues in
>> this model? I mean when registering two devices under different names
>> that are supposed to be addressable under the same alias once
>> instantiated. I didn't follow recent qtree naming changes in details
>> unfortunately, if they already enable this.
> 
> I think everyone is in agreement.  We'll start with an APICBase type
> that's modeled in qdev as a base class.
> 
> There will be an APICBaseInfo that will replace APICBackend.
> 
> There will be two classes that implement APICBaseInfo, KvmAPIC and
> APIC.  They will be separate devices.
> 
> APICBase will register the vmsd and will use the name "apic" to register
> it. You can just set the qdev.vmsd field in the apic_qdev_register()
> function to ensure that both use the same implementation.

I'm not talking about migration here, I'm talking about qtree
addressability. That is orthogonal, at least right now.

> 
>>
>> This does not need to be implemented before merge. I just like to have a
>> common view on how to address it once it matters (for device inspection).
> 
> You can do this all today without any pending patches.

Nope, don't see how.

There is currently no use case for it (e.g. no device_show -
device_add/del makes no sense for the devices in question), but it
should be addressable in QOM in the future.

Jan
Anthony Liguori - Dec. 20, 2011, 9:38 p.m.
On 12/20/2011 03:23 PM, Jan Kiszka wrote:
> On 2011-12-20 20:14, Anthony Liguori wrote:
>> On 12/20/2011 11:02 AM, Jan Kiszka wrote:
>>> On 2011-12-20 15:07, Anthony Liguori wrote:
>>>> On 12/20/2011 07:57 AM, Paolo Bonzini wrote:
>>>>> On 12/20/2011 02:54 PM, Anthony Liguori wrote:
>>>>>>> In QOM parlance Jan implemented this:
>>>>>>>
>>>>>>> abstract class Object
>>>>>>> abstract class Device
>>>>>>> class APIC: { backend: link<APICBackend>   }
>>>>>>> abstract class APICBackend
>>>>>>> class QEMU_APICBackend
>>>>>>> class KVM_APICBackend
>>>>>>
>>>>>> I don't fundamentally object to modeling it like this provided that
>>>>>> it's
>>>>>> modeled (and visible) through qdev and not done through a one-off
>>>>>> infrastructure.
>>>>>
>>>>> There is no superclass of DeviceState, hence doing it through qdev
>>>>> would mean
>>>>> introducing a new bus type and so on. This would be a superb example
>>>>> of a
>>>>> useless bus that can disappear with QOM, but I don't see why we should
>>>>> take the
>>>>> pain to add it in the first place. :)
>>>>
>>>> Right, so let's modeled it for now as inheritance which qdev can cope
>>>> with.
>>>
>>> Do we have a clear plan now how to sort out the addressing issues in
>>> this model? I mean when registering two devices under different names
>>> that are supposed to be addressable under the same alias once
>>> instantiated. I didn't follow recent qtree naming changes in details
>>> unfortunately, if they already enable this.
>>
>> I think everyone is in agreement.  We'll start with an APICBase type
>> that's modeled in qdev as a base class.
>>
>> There will be an APICBaseInfo that will replace APICBackend.
>>
>> There will be two classes that implement APICBaseInfo, KvmAPIC and
>> APIC.  They will be separate devices.
>>
>> APICBase will register the vmsd and will use the name "apic" to register
>> it. You can just set the qdev.vmsd field in the apic_qdev_register()
>> function to ensure that both use the same implementation.
>
> I'm not talking about migration here, I'm talking about qtree
> addressability. That is orthogonal, at least right now.

qtree is not an ABI.  The output of info qtree can (and will) change over time.

>
>>
>>>
>>> This does not need to be implemented before merge. I just like to have a
>>> common view on how to address it once it matters (for device inspection).
>>
>> You can do this all today without any pending patches.
>
> Nope, don't see how.

What is this issue?

>
> There is currently no use case for it (e.g. no device_show -
> device_add/del makes no sense for the devices in question), but it
> should be addressable in QOM in the future.

I guess I'm a bit confused...

Regards,

Anthony Liguori

>
> Jan
>
Jan Kiszka - Dec. 20, 2011, 9:45 p.m.
On 2011-12-20 22:38, Anthony Liguori wrote:
> On 12/20/2011 03:23 PM, Jan Kiszka wrote:
>> On 2011-12-20 20:14, Anthony Liguori wrote:
>>> On 12/20/2011 11:02 AM, Jan Kiszka wrote:
>>>> On 2011-12-20 15:07, Anthony Liguori wrote:
>>>>> On 12/20/2011 07:57 AM, Paolo Bonzini wrote:
>>>>>> On 12/20/2011 02:54 PM, Anthony Liguori wrote:
>>>>>>>> In QOM parlance Jan implemented this:
>>>>>>>>
>>>>>>>> abstract class Object
>>>>>>>> abstract class Device
>>>>>>>> class APIC: { backend: link<APICBackend>   }
>>>>>>>> abstract class APICBackend
>>>>>>>> class QEMU_APICBackend
>>>>>>>> class KVM_APICBackend
>>>>>>>
>>>>>>> I don't fundamentally object to modeling it like this provided that
>>>>>>> it's
>>>>>>> modeled (and visible) through qdev and not done through a one-off
>>>>>>> infrastructure.
>>>>>>
>>>>>> There is no superclass of DeviceState, hence doing it through qdev
>>>>>> would mean
>>>>>> introducing a new bus type and so on. This would be a superb example
>>>>>> of a
>>>>>> useless bus that can disappear with QOM, but I don't see why we
>>>>>> should
>>>>>> take the
>>>>>> pain to add it in the first place. :)
>>>>>
>>>>> Right, so let's modeled it for now as inheritance which qdev can cope
>>>>> with.
>>>>
>>>> Do we have a clear plan now how to sort out the addressing issues in
>>>> this model? I mean when registering two devices under different names
>>>> that are supposed to be addressable under the same alias once
>>>> instantiated. I didn't follow recent qtree naming changes in details
>>>> unfortunately, if they already enable this.
>>>
>>> I think everyone is in agreement.  We'll start with an APICBase type
>>> that's modeled in qdev as a base class.
>>>
>>> There will be an APICBaseInfo that will replace APICBackend.
>>>
>>> There will be two classes that implement APICBaseInfo, KvmAPIC and
>>> APIC.  They will be separate devices.
>>>
>>> APICBase will register the vmsd and will use the name "apic" to register
>>> it. You can just set the qdev.vmsd field in the apic_qdev_register()
>>> function to ensure that both use the same implementation.
>>
>> I'm not talking about migration here, I'm talking about qtree
>> addressability. That is orthogonal, at least right now.
> 
> qtree is not an ABI.  The output of info qtree can (and will) change
> over time.

That's not the point. The point is that at least some branch of the
qtree should be identically named for both the KVM and the user space
incarnations of a particular device (given a certain qemu version).

The request was that /qtree/path/to/apic should not change if you enable
KVM in-kernel acceleration in the very same qemu release. There can also
be some /qtree/path/to/kvm-apic then, but as alias (or as primary name
and the other becomes an alias). I think this makes sense if the user is
still able to clearly differentiate between both versions when listing
devices.

Jan
Anthony Liguori - Dec. 20, 2011, 9:55 p.m.
On 12/20/2011 03:45 PM, Jan Kiszka wrote:
> On 2011-12-20 22:38, Anthony Liguori wrote:
>>> I'm not talking about migration here, I'm talking about qtree
>>> addressability. That is orthogonal, at least right now.
>>
>> qtree is not an ABI.  The output of info qtree can (and will) change
>> over time.
>
> That's not the point. The point is that at least some branch of the
> qtree should be identically named for both the KVM and the user space
> incarnations of a particular device (given a certain qemu version).

There is no such thing as "qtree paths".  Today, devices have ids or are 
anonymous.  The apic is currently an anonymous device and there's no way to 
address it until we complete the PC composition tree.  I have patches for this, 
but that won't land until after series 4.

Starting right now, we have a standard path mechanism.  This path will either 
follow the composition tree or potentially an arbitrary path through the link graph.

The components of the path are the *property* names of the parent device.  In 
the case of the local APIC, you would have something like:

/cpus/cpu0/apic
/cpus/cpu1/apic

Which would be links on the composition tree.  The name wouldn't change even if 
the type of this object changed.  You'll probably have a flag or something in 
the cpu object that lets you determine whether the child is created as a 
kvm-apic or just a normal apic.  But that would only affect the 'type' flag.

> The request was that /qtree/path/to/apic should not change if you enable
> KVM in-kernel acceleration in the very same qemu release.

The type names of the devices are orthogonal to the path names.

> There can also
> be some /qtree/path/to/kvm-apic then, but as alias (or as primary name
> and the other becomes an alias).   I think this makes sense if the user is
> still able to clearly differentiate between both versions when listing
> devices.

Yes, they just need to read the 'type' property.  The distinguishing property 
would be:

/cpus/cpu0/apic.type = 'apic'

vs.

/cpus/cpu0/apic.type = 'kvm-apic'

But otherwise, it would look the same.

Again, if you implement qdev based inheritance as I described in my previous 
note, this will all Just Work.  We have everything we need in the tree to model 
this.

Regards,

Anthony Liguori

>
> Jan
>
Jan Kiszka - Dec. 20, 2011, 10:20 p.m.
On 2011-12-20 22:55, Anthony Liguori wrote:
> On 12/20/2011 03:45 PM, Jan Kiszka wrote:
>> On 2011-12-20 22:38, Anthony Liguori wrote:
>>>> I'm not talking about migration here, I'm talking about qtree
>>>> addressability. That is orthogonal, at least right now.
>>>
>>> qtree is not an ABI.  The output of info qtree can (and will) change
>>> over time.
>>
>> That's not the point. The point is that at least some branch of the
>> qtree should be identically named for both the KVM and the user space
>> incarnations of a particular device (given a certain qemu version).
> 
> There is no such thing as "qtree paths".  Today, devices have ids or are
> anonymous.  The apic is currently an anonymous device and there's no way
> to address it until we complete the PC composition tree.  I have patches
> for this, but that won't land until after series 4.
> 
> Starting right now, we have a standard path mechanism.  This path will
> either follow the composition tree or potentially an arbitrary path
> through the link graph.
> 
> The components of the path are the *property* names of the parent
> device.  In the case of the local APIC, you would have something like:
> 
> /cpus/cpu0/apic
> /cpus/cpu1/apic
> 
> Which would be links on the composition tree.  The name wouldn't change
> even if the type of this object changed. 

Perfect! That was what I forgot about and what makes it possible to
return to the original two-device model.

> You'll probably have a flag or
> something in the cpu object that lets you determine whether the child is
> created as a kvm-apic or just a normal apic. 

I rather hope you will be able to ask the device for its type instead
replicating that information.

Jan
Anthony Liguori - Dec. 20, 2011, 11:41 p.m.
On 12/20/2011 04:20 PM, Jan Kiszka wrote:
> On 2011-12-20 22:55, Anthony Liguori wrote:
>> The components of the path are the *property* names of the parent
>> device.  In the case of the local APIC, you would have something like:
>>
>> /cpus/cpu0/apic
>> /cpus/cpu1/apic
>>
>> Which would be links on the composition tree.  The name wouldn't change
>> even if the type of this object changed.
>
> Perfect! That was what I forgot about and what makes it possible to
> return to the original two-device model.
>
>> You'll probably have a flag or
>> something in the cpu object that lets you determine whether the child is
>> created as a kvm-apic or just a normal apic.
>
> I rather hope you will be able to ask the device for its type instead
> replicating that information.

Yes, but that's not what I was getting at.

I think you are currently planning on enabling/disabling the in-kernel apic 
through a machine option?

Where I'd like to get to is that the CPUs are modeled as devices and whether the 
APIC is in-kernel or not is a property of the CPU (just like any other CPU flag).

For something like the i8254, since that's a child of the PIIX3, it would be a 
property of the PIIX3 which it would use to create the appropriate i8254 type.

You could also have the CPU and/or i8254 have a link<> which would allow a user 
to explicitly instantiate the appropriate device but I think that makes it 
harder to use than it should be.

By making it a property of the composition parent, you let the parent make the 
best choice to start with and then a user has the ability to override it if it 
sees fit to.

Regards,

Anthony Liguori

> Jan
>
Jan Kiszka - Dec. 20, 2011, 11:45 p.m.
On 2011-12-21 00:41, Anthony Liguori wrote:
> On 12/20/2011 04:20 PM, Jan Kiszka wrote:
>> On 2011-12-20 22:55, Anthony Liguori wrote:
>>> The components of the path are the *property* names of the parent
>>> device.  In the case of the local APIC, you would have something like:
>>>
>>> /cpus/cpu0/apic
>>> /cpus/cpu1/apic
>>>
>>> Which would be links on the composition tree.  The name wouldn't change
>>> even if the type of this object changed.
>>
>> Perfect! That was what I forgot about and what makes it possible to
>> return to the original two-device model.
>>
>>> You'll probably have a flag or
>>> something in the cpu object that lets you determine whether the child is
>>> created as a kvm-apic or just a normal apic.
>>
>> I rather hope you will be able to ask the device for its type instead
>> replicating that information.
> 
> Yes, but that's not what I was getting at.
> 
> I think you are currently planning on enabling/disabling the in-kernel
> apic through a machine option?

Yes, because it is a VM-wide flag, nothing you can control per irqchip,
per chipset or whatever. It must be consistent for the whole VM, means
all CPUs, the chipset, the IOAPIC (which may or may not (PIIX3) be part
of it) etc. It also affects KVM internals that are not directly bound to
device models.

Jan

Patch

diff --git a/Makefile.target b/Makefile.target
index 1d24a30..c46f062 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -231,7 +231,7 @@  obj-$(CONFIG_IVSHMEM) += ivshmem.o
 # Hardware support
 obj-i386-y += vga.o
 obj-i386-y += mc146818rtc.o pc.o
-obj-i386-y += cirrus_vga.o sga.o apic.o ioapic.o piix_pci.o
+obj-i386-y += cirrus_vga.o sga.o apic_common.o apic.o ioapic.o piix_pci.o
 obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
diff --git a/hw/apic.c b/hw/apic.c
index bec493b..5fa3111 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -16,53 +16,13 @@ 
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>
  */
-#include "hw.h"
+#include "apic_internal.h"
 #include "apic.h"
 #include "ioapic.h"
-#include "qemu-timer.h"
 #include "host-utils.h"
-#include "sysbus.h"
 #include "trace.h"
 #include "pc.h"
 
-/* APIC Local Vector Table */
-#define APIC_LVT_TIMER   0
-#define APIC_LVT_THERMAL 1
-#define APIC_LVT_PERFORM 2
-#define APIC_LVT_LINT0   3
-#define APIC_LVT_LINT1   4
-#define APIC_LVT_ERROR   5
-#define APIC_LVT_NB      6
-
-/* APIC delivery modes */
-#define APIC_DM_FIXED	0
-#define APIC_DM_LOWPRI	1
-#define APIC_DM_SMI	2
-#define APIC_DM_NMI	4
-#define APIC_DM_INIT	5
-#define APIC_DM_SIPI	6
-#define APIC_DM_EXTINT	7
-
-/* APIC destination mode */
-#define APIC_DESTMODE_FLAT	0xf
-#define APIC_DESTMODE_CLUSTER	1
-
-#define APIC_TRIGGER_EDGE  0
-#define APIC_TRIGGER_LEVEL 1
-
-#define	APIC_LVT_TIMER_PERIODIC		(1<<17)
-#define	APIC_LVT_MASKED			(1<<16)
-#define	APIC_LVT_LEVEL_TRIGGER		(1<<15)
-#define	APIC_LVT_REMOTE_IRR		(1<<14)
-#define	APIC_INPUT_POLARITY		(1<<13)
-#define	APIC_SEND_PENDING		(1<<12)
-
-#define ESR_ILLEGAL_ADDRESS (1 << 7)
-
-#define APIC_SV_DIRECTED_IO             (1<<12)
-#define APIC_SV_ENABLE                  (1<<8)
-
-#define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
 /* Intel APIC constants: from include/asm/msidef.h */
@@ -75,40 +35,7 @@ 
 #define MSI_ADDR_DEST_ID_SHIFT		12
 #define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
 
-#define MSI_ADDR_SIZE                   0x100000
-
-typedef struct APICState APICState;
-
-struct APICState {
-    SysBusDevice busdev;
-    MemoryRegion io_memory;
-    void *cpu_env;
-    uint32_t apicbase;
-    uint8_t id;
-    uint8_t arb_id;
-    uint8_t tpr;
-    uint32_t spurious_vec;
-    uint8_t log_dest;
-    uint8_t dest_mode;
-    uint32_t isr[8];  /* in service register */
-    uint32_t tmr[8];  /* trigger mode register */
-    uint32_t irr[8]; /* interrupt request register */
-    uint32_t lvt[APIC_LVT_NB];
-    uint32_t esr; /* error register */
-    uint32_t icr[2];
-
-    uint32_t divide_conf;
-    int count_shift;
-    uint32_t initial_count;
-    int64_t initial_count_load_time, next_time;
-    uint32_t idx;
-    QEMUTimer *timer;
-    int sipi_vector;
-    int wait_for_sipi;
-};
-
 static APICState *local_apics[MAX_APICS + 1];
-static int apic_irq_delivered;
 
 static void apic_set_irq(APICState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICState *s);
@@ -205,10 +132,8 @@  void apic_deliver_pic_intr(DeviceState *d, int level)
     }
 }
 
-void apic_deliver_nmi(DeviceState *d)
+static void apic_external_nmi(APICState *s)
 {
-    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-
     apic_local_deliver(s, APIC_LVT_LINT1);
 }
 
@@ -300,14 +225,8 @@  void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
     apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
-void cpu_set_apic_base(DeviceState *d, uint64_t val)
+static void apic_set_base(APICState *s, uint64_t val)
 {
-    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-
-    trace_cpu_set_apic_base(val);
-
-    if (!s)
-        return;
     s->apicbase = (val & 0xfffff000) |
         (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
     /* if disabled, cannot be enabled again */
@@ -318,32 +237,12 @@  void cpu_set_apic_base(DeviceState *d, uint64_t val)
     }
 }
 
-uint64_t cpu_get_apic_base(DeviceState *d)
+static void apic_set_tpr(APICState *s, uint8_t val)
 {
-    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-
-    trace_cpu_get_apic_base(s ? (uint64_t)s->apicbase: 0);
-
-    return s ? s->apicbase : 0;
-}
-
-void cpu_set_apic_tpr(DeviceState *d, uint8_t val)
-{
-    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-
-    if (!s)
-        return;
     s->tpr = (val & 0x0f) << 4;
     apic_update_irq(s);
 }
 
-uint8_t cpu_get_apic_tpr(DeviceState *d)
-{
-    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-
-    return s ? s->tpr >> 4 : 0;
-}
-
 /* return -1 if no bit is set */
 static int get_highest_priority_int(uint32_t *tab)
 {
@@ -413,27 +312,6 @@  static void apic_update_irq(APICState *s)
     }
 }
 
-void apic_report_irq_delivered(int delivered)
-{
-    apic_irq_delivered += delivered;
-
-    trace_apic_report_irq_delivered(apic_irq_delivered);
-}
-
-void apic_reset_irq_delivered(void)
-{
-    trace_apic_reset_irq_delivered(apic_irq_delivered);
-
-    apic_irq_delivered = 0;
-}
-
-int apic_get_irq_delivered(void)
-{
-    trace_apic_get_irq_delivered(apic_irq_delivered);
-
-    return apic_irq_delivered;
-}
-
 static void apic_set_irq(APICState *s, int vector_num, int trigger_mode)
 {
     apic_report_irq_delivered(!get_bit(s->irr, vector_num));
@@ -515,35 +393,6 @@  static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
     }
 }
 
-void apic_init_reset(DeviceState *d)
-{
-    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-    int i;
-
-    if (!s)
-        return;
-
-    s->tpr = 0;
-    s->spurious_vec = 0xff;
-    s->log_dest = 0;
-    s->dest_mode = 0xf;
-    memset(s->isr, 0, sizeof(s->isr));
-    memset(s->tmr, 0, sizeof(s->tmr));
-    memset(s->irr, 0, sizeof(s->irr));
-    for(i = 0; i < APIC_LVT_NB; i++)
-        s->lvt[i] = 1 << 16; /* mask LVT */
-    s->esr = 0;
-    memset(s->icr, 0, sizeof(s->icr));
-    s->divide_conf = 0;
-    s->count_shift = 0;
-    s->initial_count = 0;
-    s->initial_count_load_time = 0;
-    s->next_time = 0;
-    s->wait_for_sipi = 1;
-
-    qemu_del_timer(s->timer);
-}
-
 static void apic_startup(APICState *s, int vector_num)
 {
     s->sipi_vector = vector_num;
@@ -904,96 +753,6 @@  static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     }
 }
 
-/* This function is only used for old state version 1 and 2 */
-static int apic_load_old(QEMUFile *f, void *opaque, int version_id)
-{
-    APICState *s = opaque;
-    int i;
-
-    if (version_id > 2)
-        return -EINVAL;
-
-    /* XXX: what if the base changes? (registered memory regions) */
-    qemu_get_be32s(f, &s->apicbase);
-    qemu_get_8s(f, &s->id);
-    qemu_get_8s(f, &s->arb_id);
-    qemu_get_8s(f, &s->tpr);
-    qemu_get_be32s(f, &s->spurious_vec);
-    qemu_get_8s(f, &s->log_dest);
-    qemu_get_8s(f, &s->dest_mode);
-    for (i = 0; i < 8; i++) {
-        qemu_get_be32s(f, &s->isr[i]);
-        qemu_get_be32s(f, &s->tmr[i]);
-        qemu_get_be32s(f, &s->irr[i]);
-    }
-    for (i = 0; i < APIC_LVT_NB; i++) {
-        qemu_get_be32s(f, &s->lvt[i]);
-    }
-    qemu_get_be32s(f, &s->esr);
-    qemu_get_be32s(f, &s->icr[0]);
-    qemu_get_be32s(f, &s->icr[1]);
-    qemu_get_be32s(f, &s->divide_conf);
-    s->count_shift=qemu_get_be32(f);
-    qemu_get_be32s(f, &s->initial_count);
-    s->initial_count_load_time=qemu_get_be64(f);
-    s->next_time=qemu_get_be64(f);
-
-    if (version_id >= 2)
-        qemu_get_timer(f, s->timer);
-    return 0;
-}
-
-static const VMStateDescription vmstate_apic = {
-    .name = "apic",
-    .version_id = 3,
-    .minimum_version_id = 3,
-    .minimum_version_id_old = 1,
-    .load_state_old = apic_load_old,
-    .fields      = (VMStateField []) {
-        VMSTATE_UINT32(apicbase, APICState),
-        VMSTATE_UINT8(id, APICState),
-        VMSTATE_UINT8(arb_id, APICState),
-        VMSTATE_UINT8(tpr, APICState),
-        VMSTATE_UINT32(spurious_vec, APICState),
-        VMSTATE_UINT8(log_dest, APICState),
-        VMSTATE_UINT8(dest_mode, APICState),
-        VMSTATE_UINT32_ARRAY(isr, APICState, 8),
-        VMSTATE_UINT32_ARRAY(tmr, APICState, 8),
-        VMSTATE_UINT32_ARRAY(irr, APICState, 8),
-        VMSTATE_UINT32_ARRAY(lvt, APICState, APIC_LVT_NB),
-        VMSTATE_UINT32(esr, APICState),
-        VMSTATE_UINT32_ARRAY(icr, APICState, 2),
-        VMSTATE_UINT32(divide_conf, APICState),
-        VMSTATE_INT32(count_shift, APICState),
-        VMSTATE_UINT32(initial_count, APICState),
-        VMSTATE_INT64(initial_count_load_time, APICState),
-        VMSTATE_INT64(next_time, APICState),
-        VMSTATE_TIMER(timer, APICState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static void apic_reset(DeviceState *d)
-{
-    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-    int bsp;
-
-    bsp = cpu_is_bsp(s->cpu_env);
-    s->apicbase = 0xfee00000 |
-        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
-
-    apic_init_reset(d);
-
-    if (bsp) {
-        /*
-         * LINT0 delivery mode on CPU #0 is set to ExtInt at initialization
-         * time typically by BIOS, so PIC interrupt can be delivered to the
-         * processor when local APIC is enabled.
-         */
-        s->lvt[APIC_LVT_LINT0] = 0x700;
-    }
-}
-
 static const MemoryRegionOps apic_io_ops = {
     .old_mmio = {
         .read = { apic_mem_readb, apic_mem_readw, apic_mem_readl, },
@@ -1002,41 +761,27 @@  static const MemoryRegionOps apic_io_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int apic_init1(SysBusDevice *dev)
+static void apic_backend_init(APICState *s)
 {
-    APICState *s = FROM_SYSBUS(APICState, dev);
-    static int last_apic_idx;
-
-    if (last_apic_idx >= MAX_APICS) {
-        return -1;
-    }
-    memory_region_init_io(&s->io_memory, &apic_io_ops, s, "apic",
-                          MSI_ADDR_SIZE);
-    sysbus_init_mmio(dev, &s->io_memory);
+    memory_region_init_io(&s->io_memory, &apic_io_ops, s, "apic-msi",
+                          MSI_SPACE_SIZE);
 
     s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s);
-    s->idx = last_apic_idx++;
     local_apics[s->idx] = s;
-    return 0;
 }
 
-static SysBusDeviceInfo apic_info = {
-    .init = apic_init1,
-    .qdev.name = "apic",
-    .qdev.size = sizeof(APICState),
-    .qdev.vmsd = &vmstate_apic,
-    .qdev.reset = apic_reset,
-    .qdev.no_user = 1,
-    .qdev.props = (Property[]) {
-        DEFINE_PROP_UINT8("id", APICState, id, -1),
-        DEFINE_PROP_PTR("cpu_env", APICState, cpu_env),
-        DEFINE_PROP_END_OF_LIST(),
-    }
+static APICBackend apic_backend = {
+    .name = "QEMU",
+    .init = apic_backend_init,
+    .set_base = apic_set_base,
+    .set_tpr = apic_set_tpr,
+    .external_nmi = apic_external_nmi,
 };
 
 static void apic_register_devices(void)
 {
-    sysbus_register_withprop(&apic_info);
+    apic_register_device();
+    apic_register_backend(&apic_backend);
 }
 
 device_init(apic_register_devices)
diff --git a/hw/apic.h b/hw/apic.h
index 8173d8a..a62d83b 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -10,7 +10,6 @@  int apic_accept_pic_intr(DeviceState *s);
 void apic_deliver_pic_intr(DeviceState *s, int level);
 void apic_deliver_nmi(DeviceState *d);
 int apic_get_interrupt(DeviceState *s);
-void apic_report_irq_delivered(int delivered);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);
 void cpu_set_apic_base(DeviceState *s, uint64_t val);
diff --git a/hw/apic_common.c b/hw/apic_common.c
new file mode 100644
index 0000000..4cdc45c
--- /dev/null
+++ b/hw/apic_common.c
@@ -0,0 +1,265 @@ 
+/*
+ *  APIC support - common bits of emulated and KVM kernel model
+ *
+ *  Copyright (c) 2004-2005 Fabrice Bellard
+ *  Copyright (c) 2011      Jan Kiszka, Siemens AG
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+#include "apic.h"
+#include "apic_internal.h"
+#include "trace.h"
+
+static QSIMPLEQ_HEAD(, APICBackend) backends =
+    QSIMPLEQ_HEAD_INITIALIZER(backends);
+static int apic_irq_delivered;
+
+void cpu_set_apic_base(DeviceState *d, uint64_t val)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+
+    trace_cpu_set_apic_base(val);
+
+    if (s) {
+        s->backend->set_base(s, val);
+    }
+}
+
+uint64_t cpu_get_apic_base(DeviceState *d)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+
+    trace_cpu_get_apic_base(s ? (uint64_t)s->apicbase : 0);
+
+    return s ? s->apicbase : 0;
+}
+
+void cpu_set_apic_tpr(DeviceState *d, uint8_t val)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+
+    if (s) {
+        s->backend->set_tpr(s, val);
+    }
+}
+
+uint8_t cpu_get_apic_tpr(DeviceState *d)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+
+    return s ? s->tpr >> 4 : 0;
+}
+
+void apic_report_irq_delivered(int delivered)
+{
+    apic_irq_delivered += delivered;
+
+    trace_apic_report_irq_delivered(apic_irq_delivered);
+}
+
+void apic_reset_irq_delivered(void)
+{
+    trace_apic_reset_irq_delivered(apic_irq_delivered);
+
+    apic_irq_delivered = 0;
+}
+
+int apic_get_irq_delivered(void)
+{
+    trace_apic_get_irq_delivered(apic_irq_delivered);
+
+    return apic_irq_delivered;
+}
+
+void apic_deliver_nmi(DeviceState *d)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+
+    s->backend->external_nmi(s);
+}
+
+void apic_init_reset(DeviceState *d)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+    int i;
+
+    if (!s) {
+        return;
+    }
+    s->tpr = 0;
+    s->spurious_vec = 0xff;
+    s->log_dest = 0;
+    s->dest_mode = 0xf;
+    memset(s->isr, 0, sizeof(s->isr));
+    memset(s->tmr, 0, sizeof(s->tmr));
+    memset(s->irr, 0, sizeof(s->irr));
+    for (i = 0; i < APIC_LVT_NB; i++) {
+        s->lvt[i] = APIC_LVT_MASKED;
+    }
+    s->esr = 0;
+    memset(s->icr, 0, sizeof(s->icr));
+    s->divide_conf = 0;
+    s->count_shift = 0;
+    s->initial_count = 0;
+    s->initial_count_load_time = 0;
+    s->next_time = 0;
+    s->wait_for_sipi = 1;
+
+    qemu_del_timer(s->timer);
+}
+
+static void apic_reset(DeviceState *d)
+{
+    APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+    bool bsp;
+
+    bsp = cpu_is_bsp(s->cpu_env);
+    s->apicbase = 0xfee00000 |
+        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
+
+    apic_init_reset(d);
+
+    if (bsp) {
+        /*
+         * LINT0 delivery mode on CPU #0 is set to ExtInt at initialization
+         * time typically by BIOS, so PIC interrupt can be delivered to the
+         * processor when local APIC is enabled.
+         */
+        s->lvt[APIC_LVT_LINT0] = 0x700;
+    }
+}
+
+/* This function is only used for old state version 1 and 2 */
+static int apic_load_old(QEMUFile *f, void *opaque, int version_id)
+{
+    APICState *s = opaque;
+    int i;
+
+    if (version_id > 2) {
+        return -EINVAL;
+    }
+
+    /* XXX: what if the base changes? (registered memory regions) */
+    qemu_get_be32s(f, &s->apicbase);
+    qemu_get_8s(f, &s->id);
+    qemu_get_8s(f, &s->arb_id);
+    qemu_get_8s(f, &s->tpr);
+    qemu_get_be32s(f, &s->spurious_vec);
+    qemu_get_8s(f, &s->log_dest);
+    qemu_get_8s(f, &s->dest_mode);
+    for (i = 0; i < 8; i++) {
+        qemu_get_be32s(f, &s->isr[i]);
+        qemu_get_be32s(f, &s->tmr[i]);
+        qemu_get_be32s(f, &s->irr[i]);
+    }
+    for (i = 0; i < APIC_LVT_NB; i++) {
+        qemu_get_be32s(f, &s->lvt[i]);
+    }
+    qemu_get_be32s(f, &s->esr);
+    qemu_get_be32s(f, &s->icr[0]);
+    qemu_get_be32s(f, &s->icr[1]);
+    qemu_get_be32s(f, &s->divide_conf);
+    s->count_shift = qemu_get_be32(f);
+    qemu_get_be32s(f, &s->initial_count);
+    s->initial_count_load_time = qemu_get_be64(f);
+    s->next_time = qemu_get_be64(f);
+
+    if (version_id >= 2) {
+        qemu_get_timer(f, s->timer);
+    }
+    return 0;
+}
+
+static const VMStateDescription vmstate_apic = {
+    .name = "apic",
+    .version_id = 3,
+    .minimum_version_id = 3,
+    .minimum_version_id_old = 1,
+    .load_state_old = apic_load_old,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(apicbase, APICState),
+        VMSTATE_UINT8(id, APICState),
+        VMSTATE_UINT8(arb_id, APICState),
+        VMSTATE_UINT8(tpr, APICState),
+        VMSTATE_UINT32(spurious_vec, APICState),
+        VMSTATE_UINT8(log_dest, APICState),
+        VMSTATE_UINT8(dest_mode, APICState),
+        VMSTATE_UINT32_ARRAY(isr, APICState, 8),
+        VMSTATE_UINT32_ARRAY(tmr, APICState, 8),
+        VMSTATE_UINT32_ARRAY(irr, APICState, 8),
+        VMSTATE_UINT32_ARRAY(lvt, APICState, APIC_LVT_NB),
+        VMSTATE_UINT32(esr, APICState),
+        VMSTATE_UINT32_ARRAY(icr, APICState, 2),
+        VMSTATE_UINT32(divide_conf, APICState),
+        VMSTATE_INT32(count_shift, APICState),
+        VMSTATE_UINT32(initial_count, APICState),
+        VMSTATE_INT64(initial_count_load_time, APICState),
+        VMSTATE_INT64(next_time, APICState),
+        VMSTATE_TIMER(timer, APICState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static int apic_init(SysBusDevice *dev)
+{
+    APICState *s = FROM_SYSBUS(APICState, dev);
+    static int apic_no;
+    APICBackend *b;
+
+    if (apic_no >= MAX_APICS) {
+        return -1;
+    }
+    s->idx = apic_no++;
+
+    QSIMPLEQ_FOREACH(b, &backends, entry) {
+        if (strcmp(b->name, s->backend_name) == 0) {
+            s->backend = b;
+            break;
+        }
+    }
+    if (!s->backend) {
+        hw_error("APIC backend '%s' not found!", s->backend_name);
+        exit(1);
+    }
+
+    b->init(s);
+
+    sysbus_init_mmio(&s->busdev, &s->io_memory);
+    return 0;
+}
+
+static SysBusDeviceInfo apic_info = {
+    .init = apic_init,
+    .qdev.name = "apic",
+    .qdev.size = sizeof(APICState),
+    .qdev.vmsd = &vmstate_apic,
+    .qdev.reset = apic_reset,
+    .qdev.no_user = 1,
+    .qdev.props = (Property[]) {
+        DEFINE_PROP_UINT8("id", APICState, id, -1),
+        DEFINE_PROP_PTR("cpu_env", APICState, cpu_env),
+        DEFINE_PROP_STRING("backend", APICState, backend_name),
+        DEFINE_PROP_END_OF_LIST(),
+    }
+};
+
+void apic_register_backend(APICBackend *backend)
+{
+    QSIMPLEQ_INSERT_TAIL(&backends, backend, entry);
+}
+
+void apic_register_device(void)
+{
+    sysbus_register_withprop(&apic_info);
+}
diff --git a/hw/apic_internal.h b/hw/apic_internal.h
new file mode 100644
index 0000000..6cbd901
--- /dev/null
+++ b/hw/apic_internal.h
@@ -0,0 +1,119 @@ 
+/*
+ *  APIC support - internal interfaces
+ *
+ *  Copyright (c) 2004-2005 Fabrice Bellard
+ *  Copyright (c) 2011      Jan Kiszka, Siemens AG
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+#ifndef QEMU_APIC_INTERNAL_H
+#define QEMU_APIC_INTERNAL_H
+
+#include "memory.h"
+#include "sysbus.h"
+#include "qemu-timer.h"
+#include "qemu-queue.h"
+
+/* APIC Local Vector Table */
+#define APIC_LVT_TIMER                  0
+#define APIC_LVT_THERMAL                1
+#define APIC_LVT_PERFORM                2
+#define APIC_LVT_LINT0                  3
+#define APIC_LVT_LINT1                  4
+#define APIC_LVT_ERROR                  5
+#define APIC_LVT_NB                     6
+
+/* APIC delivery modes */
+#define APIC_DM_FIXED                   0
+#define APIC_DM_LOWPRI                  1
+#define APIC_DM_SMI                     2
+#define APIC_DM_NMI                     4
+#define APIC_DM_INIT                    5
+#define APIC_DM_SIPI                    6
+#define APIC_DM_EXTINT                  7
+
+/* APIC destination mode */
+#define APIC_DESTMODE_FLAT              0xf
+#define APIC_DESTMODE_CLUSTER           1
+
+#define APIC_TRIGGER_EDGE               0
+#define APIC_TRIGGER_LEVEL              1
+
+#define APIC_LVT_TIMER_PERIODIC         (1<<17)
+#define APIC_LVT_MASKED                 (1<<16)
+#define APIC_LVT_LEVEL_TRIGGER          (1<<15)
+#define APIC_LVT_REMOTE_IRR             (1<<14)
+#define APIC_INPUT_POLARITY             (1<<13)
+#define APIC_SEND_PENDING               (1<<12)
+
+#define ESR_ILLEGAL_ADDRESS (1 << 7)
+
+#define APIC_SV_DIRECTED_IO             (1<<12)
+#define APIC_SV_ENABLE                  (1<<8)
+
+#define MAX_APICS 255
+
+#define MSI_SPACE_SIZE                  0x100000
+
+typedef struct APICBackend APICBackend;
+typedef struct APICState APICState;
+
+struct APICBackend {
+    const char *name;
+    void (*init)(APICState *s);
+    void (*set_base)(APICState *s, uint64_t val);
+    void (*set_tpr)(APICState *s, uint8_t val);
+    void (*external_nmi)(APICState *s);
+
+    QSIMPLEQ_ENTRY(APICBackend) entry;
+};
+
+struct APICState {
+    SysBusDevice busdev;
+    MemoryRegion io_memory;
+    void *cpu_env;
+    uint32_t apicbase;
+    uint8_t id;
+    uint8_t arb_id;
+    uint8_t tpr;
+    uint32_t spurious_vec;
+    uint8_t log_dest;
+    uint8_t dest_mode;
+    uint32_t isr[8];  /* in service register */
+    uint32_t tmr[8];  /* trigger mode register */
+    uint32_t irr[8]; /* interrupt request register */
+    uint32_t lvt[APIC_LVT_NB];
+    uint32_t esr; /* error register */
+    uint32_t icr[2];
+
+    uint32_t divide_conf;
+    int count_shift;
+    uint32_t initial_count;
+    int64_t initial_count_load_time;
+    int64_t next_time;
+    int idx;
+    QEMUTimer *timer;
+    int sipi_vector;
+    int wait_for_sipi;
+
+    char *backend_name;
+    APICBackend *backend;
+};
+
+void apic_register_device(void);
+void apic_register_backend(APICBackend *backend);
+
+void apic_report_irq_delivered(int delivered);
+
+#endif /* !QEMU_APIC_INTERNAL_H */
diff --git a/hw/pc.c b/hw/pc.c
index 240aaae..ee6e59b 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -884,6 +884,7 @@  static DeviceState *apic_init(void *env, uint8_t apic_id)
     dev = qdev_create(NULL, "apic");
     qdev_prop_set_uint8(dev, "id", apic_id);
     qdev_prop_set_ptr(dev, "cpu_env", env);
+    qdev_prop_set_string(dev, "backend", g_strdup("QEMU"));
     qdev_init_nofail(dev);
     d = sysbus_from_qdev(dev);