Patchwork [2/2] LAPIC: make lapic support cpu hotplug

login
register
mail settings
Submitter pingfank@linux.vnet.com
Date Oct. 4, 2011, 11:13 a.m.
Message ID <1317726818-8514-4-git-send-email-pingfank@linux.vnet.com>
Download mbox | patch
Permalink /patch/117602/
State New
Headers show

Comments

pingfank@linux.vnet.com - Oct. 4, 2011, 11:13 a.m.
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Separate apic from qbus to icc bus which supports hotplug feature.
And make the creation of apic as part of cpu initialization, so
apic's state has been ready, before setting kvm_apic.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 Makefile.target      |    1 +
 hw/apic.c            |    7 ++++-
 hw/apic.h            |    1 +
 hw/icc_bus.c         |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/icc_bus.h         |   24 +++++++++++++++++++
 hw/pc.c              |   11 ++++-----
 target-i386/cpu.h    |    1 +
 target-i386/helper.c |    7 +++++-
 target-i386/kvm.c    |    1 -
 9 files changed, 105 insertions(+), 10 deletions(-)
 create mode 100644 hw/icc_bus.c
 create mode 100644 hw/icc_bus.h
Anthony Liguori - Oct. 4, 2011, 12:58 p.m.
On 10/04/2011 06:13 AM, pingfank@linux.vnet.com wrote:
> From: Liu Ping Fan<pingfank@linux.vnet.ibm.com>
>
> Separate apic from qbus to icc bus which supports hotplug feature.
> And make the creation of apic as part of cpu initialization, so
> apic's state has been ready, before setting kvm_apic.
>
> Signed-off-by: Liu Ping Fan<pingfank@linux.vnet.ibm.com>
> ---
>   Makefile.target      |    1 +
>   hw/apic.c            |    7 ++++-
>   hw/apic.h            |    1 +
>   hw/icc_bus.c         |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++
>   hw/icc_bus.h         |   24 +++++++++++++++++++
>   hw/pc.c              |   11 ++++-----
>   target-i386/cpu.h    |    1 +
>   target-i386/helper.c |    7 +++++-
>   target-i386/kvm.c    |    1 -
>   9 files changed, 105 insertions(+), 10 deletions(-)
>   create mode 100644 hw/icc_bus.c
>   create mode 100644 hw/icc_bus.h
>
> diff --git a/Makefile.target b/Makefile.target
> index 9011f28..5607c6d 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -241,6 +241,7 @@ obj-i386-$(CONFIG_KVM) += kvmclock.o
>   obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>   obj-i386-y += testdev.o
>   obj-i386-y += acpi.o acpi_piix4.o
> +obj-i386-y += icc_bus.o
>
>   obj-i386-y += pcspk.o i8254.o
>   obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
> diff --git a/hw/apic.c b/hw/apic.c
> index 69d6ac5..95a1664 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -24,6 +24,7 @@
>   #include "sysbus.h"
>   #include "trace.h"
>   #include "kvm.h"
> +#include "icc_bus.h"
>
>   /* APIC Local Vector Table */
>   #define APIC_LVT_TIMER   0
> @@ -304,11 +305,13 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val)
>
>       if (!s)
>           return;
> +
>       if (kvm_enabled()&&  kvm_irqchip_in_kernel())
>           s->apicbase = val;
>       else
>           s->apicbase = (val&  0xfffff000) |
>               (s->apicbase&  (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
> +
>       /* if disabled, cannot be enabled again */
>       if (!(val&  MSR_IA32_APICBASE_ENABLE)) {
>           s->apicbase&= ~MSR_IA32_APICBASE_ENABLE;
> @@ -1075,7 +1078,7 @@ static const VMStateDescription vmstate_apic = {
>       }
>   };

Please don't introduce extra whitespace in unrelated code.

>
> -static void apic_reset(DeviceState *d)
> +void apic_reset(DeviceState *d)
>   {
>       APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
>       int bsp;
> @@ -1138,7 +1141,7 @@ static SysBusDeviceInfo apic_info = {
>
>   static void apic_register_devices(void)
>   {
> -    sysbus_register_withprop(&apic_info);
> +    iccbus_register(&apic_info);
>   }
>
>   device_init(apic_register_devices)
> diff --git a/hw/apic.h b/hw/apic.h
> index c857d52..e258efa 100644
> --- a/hw/apic.h
> +++ b/hw/apic.h
> @@ -24,5 +24,6 @@ void apic_sipi(DeviceState *s);
>   /* pc.c */
>   int cpu_is_bsp(CPUState *env);
>   DeviceState *cpu_get_current_apic(void);
> +void apic_reset(DeviceState *d);
>
>   #endif
> diff --git a/hw/icc_bus.c b/hw/icc_bus.c
> new file mode 100644
> index 0000000..360ca2a
> --- /dev/null
> +++ b/hw/icc_bus.c
> @@ -0,0 +1,62 @@
> +/*
> +*/

New files need to include copyright/licenses.

> +#define ICC_BUS_PLUG
> +#ifdef ICC_BUS_PLUG

Drop these guards here and at the end of the file.  We conditionally build files 
by having an:

obj-$(CONFIG_ICC_BUS) += icc_bus.o

In the Makefile.

> +#include "icc_bus.h"

> +
> +
> +
> +struct icc_bus_info icc_info = {
> +    .qinfo.name = "icc",
> +    .qinfo.size = sizeof(struct icc_bus),
> +    .qinfo.props = (Property[]) {
> +        DEFINE_PROP_END_OF_LIST(),
> +    }
> +
> +};

Structure name is not following Coding Style.

> +
> +static const VMStateDescription vmstate_icc_bus = {
> +    .name = "icc_bus",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .minimum_version_id_old = 1,
> +    .pre_save = NULL,
> +    .post_load = NULL,
> +};
> +
> +struct icc_bus *g_iccbus;
> +
> +struct icc_bus *icc_init_bus(DeviceState *parent, const char *name)
> +{
> +    struct icc_bus *bus;
> +
> +    bus = FROM_QBUS(icc_bus, qbus_create(&icc_info.qinfo, parent, name));
> +    bus->qbus.allow_hotplug = 1; /* Yes, we can */
> +    bus->qbus.name = "icc";
> +    vmstate_register(NULL, -1,&vmstate_icc_bus, bus);
> +    g_iccbus = bus;
> +    return bus;
> +}
> +
> +
> +
> +
> +
> +static int iccbus_device_init(DeviceState *dev, DeviceInfo *base)
> +{
> +    SysBusDeviceInfo *info = container_of(base, SysBusDeviceInfo, qdev);
> +
> +    return info->init(sysbus_from_qdev(dev));
> +}
> +
> +void iccbus_register(SysBusDeviceInfo *info)
> +{
> +    info->qdev.init = iccbus_device_init;
> +    info->qdev.bus_info =&icc_info.qinfo;
> +
> +    assert(info->qdev.size>= sizeof(SysBusDevice));
> +    qdev_register(&info->qdev);
> +}
> +

It's not obvious to me why you need the g_iccbus variable.

> +#endif
> diff --git a/hw/icc_bus.h b/hw/icc_bus.h
> new file mode 100644
> index 0000000..94d9242
> --- /dev/null
> +++ b/hw/icc_bus.h
> @@ -0,0 +1,24 @@
> +#ifndef QEMU_ICC_H
> +#define QEMU_ICC_H


Needs copyright and a blurb explaining what this file is and what the ICC bus is.

Same comments re: whitespace below.

Regards,

Anthony Liguori

> +#include "qdev.h"
> +#include "sysbus.h"
> +
> +typedef struct icc_bus icc_bus;
> +typedef struct icc_bus_info icc_bus_info;
> +
> +
> +struct icc_bus {
> +    BusState qbus;
> +};
> +
> +struct icc_bus_info {
> +    BusInfo qinfo;
> +};
> +
> +extern struct icc_bus_info icc_info;
> +extern struct icc_bus *g_iccbus;
> +extern struct icc_bus *icc_init_bus(DeviceState *parent, const char *name);
> +extern void iccbus_register(SysBusDeviceInfo *info);

Functions don't need an 'extern' modifier.  I don't think icc_info needs to be 
exported either.

Instead of exporting g_iccbus, it would be better to have a get_icc_bus() method 
that returned the global iccbus.  That way we can more easily hook accesses to 
the bus.

> +#endif
> diff --git a/hw/pc.c b/hw/pc.c
> index 6b3662e..10371d8 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -24,6 +24,7 @@
>   #include "hw.h"
>   #include "pc.h"
>   #include "apic.h"
> +#include "icc_bus.h"
>   #include "fdc.h"
>   #include "ide.h"
>   #include "pci.h"
> @@ -91,6 +92,7 @@ struct e820_table {
>   static struct e820_table e820_table;
>   struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
>
> +
>   void isa_irq_handler(void *opaque, int n, int level)
>   {
>       IsaIrqState *isa = (IsaIrqState *)opaque;
> @@ -872,13 +874,13 @@ DeviceState *cpu_get_current_apic(void)
>       }
>   }
>
> -static DeviceState *apic_init(void *env, uint8_t apic_id)
> +DeviceState *apic_init(void *env, uint8_t apic_id)
>   {
>       DeviceState *dev;
>       SysBusDevice *d;
>       static int apic_mapped;
>
> -    dev = qdev_create(NULL, "apic");
> +    dev = qdev_create(&g_iccbus->qbus, "apic");
>       qdev_prop_set_uint8(dev, "id", apic_id);
>       qdev_prop_set_ptr(dev, "cpu_env", env);
>       qdev_init_nofail(dev);
> @@ -943,10 +945,6 @@ CPUState *pc_new_cpu(const char *cpu_model)
>           fprintf(stderr, "Unable to find x86 CPU definition\n");
>           exit(1);
>       }
> -    if ((env->cpuid_features&  CPUID_APIC) || smp_cpus>  1) {
> -        env->cpuid_apic_id = env->cpu_index;
> -        env->apic_state = apic_init(env, env->cpuid_apic_id);
> -    }
>       qemu_register_reset(pc_cpu_reset, env);
>       pc_cpu_reset(env);
>       return env;
> @@ -956,6 +954,7 @@ void pc_cpus_init(const char *cpu_model)
>   {
>       int i;
>
> +    icc_init_bus(NULL, "icc");
>       /* init CPUs */
>       for(i = 0; i<  smp_cpus; i++) {
>           pc_new_cpu(cpu_model);
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 2a071f2..0160c55 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -1062,4 +1062,5 @@ void svm_check_intercept(CPUState *env1, uint32_t type);
>
>   uint32_t cpu_cc_compute_all(CPUState *env1, int op);
>
> +extern DeviceState *apic_init(void *env, uint8_t apic_id);
>   #endif /* CPU_I386_H */
> diff --git a/target-i386/helper.c b/target-i386/helper.c
> index 5df40d4..551a8a2 100644
> --- a/target-i386/helper.c
> +++ b/target-i386/helper.c
> @@ -30,6 +30,7 @@
>   #include "monitor.h"
>   #endif
>
> +
>   //#define DEBUG_MMU
>
>   /* NOTE: must be called outside the CPU execute loop */
> @@ -1257,7 +1258,11 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
>           return NULL;
>       }
>       mce_init(env);
> -
> +    if ((env->cpuid_features&  CPUID_APIC)
> +        || smp_cpus>  1) {
> +        env->cpuid_apic_id = env->cpu_index;
> +        env->apic_state = apic_init(env, env->cpuid_apic_id);
> +    }
>       qemu_init_vcpu(env);
>
>       return env;
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 571d792..407dba6 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -880,7 +880,6 @@ static int kvm_put_sregs(CPUState *env)
>
>       sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
>       sregs.apic_base = cpu_get_apic_base(env->apic_state);
> -
>       sregs.efer = env->efer;
>
>       return kvm_vcpu_ioctl(env, KVM_SET_SREGS,&sregs);
Jan Kiszka - Oct. 4, 2011, 4:43 p.m.
On 2011-10-04 13:13, pingfank@linux.vnet.com wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> Separate apic from qbus to icc bus which supports hotplug feature.

Modeling the ICC bus looks like a step in the right direction. The
IOAPIC could be attached to it as well to get rid of "ioapics[MAX_IOAPICS]".

> And make the creation of apic as part of cpu initialization, so
> apic's state has been ready, before setting kvm_apic.

There is no kvm-apic upstream yet, so it's hard to judge why we need
this here. If we do, this has to be a separate patch. But I seriously
doubt we need it (my hack worked without it, and that was not because of
its hack nature).

> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  Makefile.target      |    1 +
>  hw/apic.c            |    7 ++++-
>  hw/apic.h            |    1 +
>  hw/icc_bus.c         |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/icc_bus.h         |   24 +++++++++++++++++++
>  hw/pc.c              |   11 ++++-----
>  target-i386/cpu.h    |    1 +
>  target-i386/helper.c |    7 +++++-
>  target-i386/kvm.c    |    1 -
>  9 files changed, 105 insertions(+), 10 deletions(-)
>  create mode 100644 hw/icc_bus.c
>  create mode 100644 hw/icc_bus.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index 9011f28..5607c6d 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -241,6 +241,7 @@ obj-i386-$(CONFIG_KVM) += kvmclock.o
>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>  obj-i386-y += testdev.o
>  obj-i386-y += acpi.o acpi_piix4.o
> +obj-i386-y += icc_bus.o
>  
>  obj-i386-y += pcspk.o i8254.o
>  obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
> diff --git a/hw/apic.c b/hw/apic.c
> index 69d6ac5..95a1664 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -24,6 +24,7 @@
>  #include "sysbus.h"
>  #include "trace.h"
>  #include "kvm.h"
> +#include "icc_bus.h"
>  
>  /* APIC Local Vector Table */
>  #define APIC_LVT_TIMER   0
> @@ -304,11 +305,13 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val)
>  
>      if (!s)
>          return;
> +
>      if (kvm_enabled() && kvm_irqchip_in_kernel())
>          s->apicbase = val;
>      else
>          s->apicbase = (val & 0xfffff000) |
>              (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
> +
>      /* if disabled, cannot be enabled again */
>      if (!(val & MSR_IA32_APICBASE_ENABLE)) {
>          s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
> @@ -1075,7 +1078,7 @@ static const VMStateDescription vmstate_apic = {
>      }
>  };
>  
> -static void apic_reset(DeviceState *d)
> +void apic_reset(DeviceState *d)

Still unused outside of this file, so keep it private.

>  {
>      APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
>      int bsp;
> @@ -1138,7 +1141,7 @@ static SysBusDeviceInfo apic_info = {
>  
>  static void apic_register_devices(void)
>  {
> -    sysbus_register_withprop(&apic_info);
> +    iccbus_register(&apic_info);
>  }
>  
>  device_init(apic_register_devices)
> diff --git a/hw/apic.h b/hw/apic.h
> index c857d52..e258efa 100644
> --- a/hw/apic.h
> +++ b/hw/apic.h
> @@ -24,5 +24,6 @@ void apic_sipi(DeviceState *s);
>  /* pc.c */
>  int cpu_is_bsp(CPUState *env);
>  DeviceState *cpu_get_current_apic(void);
> +void apic_reset(DeviceState *d);
>  
>  #endif
> diff --git a/hw/icc_bus.c b/hw/icc_bus.c
> new file mode 100644
> index 0000000..360ca2a
> --- /dev/null
> +++ b/hw/icc_bus.c
> @@ -0,0 +1,62 @@
> +/*
> +*/
> +#define ICC_BUS_PLUG
> +#ifdef ICC_BUS_PLUG
> +#include "icc_bus.h"
> +
> +
> +
> +struct icc_bus_info icc_info = {
> +    .qinfo.name = "icc",
> +    .qinfo.size = sizeof(struct icc_bus),
> +    .qinfo.props = (Property[]) {
> +        DEFINE_PROP_END_OF_LIST(),
> +    }
> +
> +};
> +
> +
> +static const VMStateDescription vmstate_icc_bus = {
> +    .name = "icc_bus",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .minimum_version_id_old = 1,
> +    .pre_save = NULL,
> +    .post_load = NULL,
> +};
> +
> +struct icc_bus *g_iccbus;
> +
> +struct icc_bus *icc_init_bus(DeviceState *parent, const char *name)
> +{
> +    struct icc_bus *bus;
> +
> +    bus = FROM_QBUS(icc_bus, qbus_create(&icc_info.qinfo, parent, name));
> +    bus->qbus.allow_hotplug = 1; /* Yes, we can */
> +    bus->qbus.name = "icc";
> +    vmstate_register(NULL, -1, &vmstate_icc_bus, bus);

The chipset is the owner of this bus and instantiates it. So it also
provides a vmstate. You can drop this unneeded one here (it's created
via an obsolete API anyway).

> +    g_iccbus = bus;
> +    return bus;
> +}
> +
> +
> +
> +
> +
> +static int iccbus_device_init(DeviceState *dev, DeviceInfo *base)
> +{
> +    SysBusDeviceInfo *info = container_of(base, SysBusDeviceInfo, qdev);
> +
> +    return info->init(sysbus_from_qdev(dev));
> +}
> +
> +void iccbus_register(SysBusDeviceInfo *info)
> +{
> +    info->qdev.init = iccbus_device_init;
> +    info->qdev.bus_info = &icc_info.qinfo;
> +
> +    assert(info->qdev.size >= sizeof(SysBusDevice));
> +    qdev_register(&info->qdev);
> +}

This service should be unneeded when the bus is properly embedded into
its parent (ie. the chipset).

> +
> +#endif
> diff --git a/hw/icc_bus.h b/hw/icc_bus.h
> new file mode 100644
> index 0000000..94d9242
> --- /dev/null
> +++ b/hw/icc_bus.h
> @@ -0,0 +1,24 @@
> +#ifndef QEMU_ICC_H
> +#define QEMU_ICC_H
> +
> +#include "qdev.h"
> +#include "sysbus.h"
> +
> +typedef struct icc_bus icc_bus;
> +typedef struct icc_bus_info icc_bus_info;
> +
> +
> +struct icc_bus {
> +    BusState qbus;
> +};
> +
> +struct icc_bus_info {
> +    BusInfo qinfo;
> +};
> +
> +extern struct icc_bus_info icc_info;
> +extern struct icc_bus *g_iccbus;
> +extern struct icc_bus *icc_init_bus(DeviceState *parent, const char *name);
> +extern void iccbus_register(SysBusDeviceInfo *info);
> +
> +#endif
> diff --git a/hw/pc.c b/hw/pc.c
> index 6b3662e..10371d8 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -24,6 +24,7 @@
>  #include "hw.h"
>  #include "pc.h"
>  #include "apic.h"
> +#include "icc_bus.h"
>  #include "fdc.h"
>  #include "ide.h"
>  #include "pci.h"
> @@ -91,6 +92,7 @@ struct e820_table {
>  static struct e820_table e820_table;
>  struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
>  
> +
>  void isa_irq_handler(void *opaque, int n, int level)
>  {
>      IsaIrqState *isa = (IsaIrqState *)opaque;
> @@ -872,13 +874,13 @@ DeviceState *cpu_get_current_apic(void)
>      }
>  }
>  
> -static DeviceState *apic_init(void *env, uint8_t apic_id)
> +DeviceState *apic_init(void *env, uint8_t apic_id)
>  {
>      DeviceState *dev;
>      SysBusDevice *d;
>      static int apic_mapped;
>  
> -    dev = qdev_create(NULL, "apic");
> +    dev = qdev_create(&g_iccbus->qbus, "apic");
>      qdev_prop_set_uint8(dev, "id", apic_id);
>      qdev_prop_set_ptr(dev, "cpu_env", env);
>      qdev_init_nofail(dev);
> @@ -943,10 +945,6 @@ CPUState *pc_new_cpu(const char *cpu_model)
>          fprintf(stderr, "Unable to find x86 CPU definition\n");
>          exit(1);
>      }
> -    if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
> -        env->cpuid_apic_id = env->cpu_index;
> -        env->apic_state = apic_init(env, env->cpuid_apic_id);
> -    }
>      qemu_register_reset(pc_cpu_reset, env);
>      pc_cpu_reset(env);
>      return env;
> @@ -956,6 +954,7 @@ void pc_cpus_init(const char *cpu_model)
>  {
>      int i;
>  
> +    icc_init_bus(NULL, "icc");
>      /* init CPUs */
>      for(i = 0; i < smp_cpus; i++) {
>          pc_new_cpu(cpu_model);
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 2a071f2..0160c55 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -1062,4 +1062,5 @@ void svm_check_intercept(CPUState *env1, uint32_t type);
>  
>  uint32_t cpu_cc_compute_all(CPUState *env1, int op);
>  
> +extern DeviceState *apic_init(void *env, uint8_t apic_id);
>  #endif /* CPU_I386_H */
> diff --git a/target-i386/helper.c b/target-i386/helper.c
> index 5df40d4..551a8a2 100644
> --- a/target-i386/helper.c
> +++ b/target-i386/helper.c
> @@ -30,6 +30,7 @@
>  #include "monitor.h"
>  #endif
>  
> +
>  //#define DEBUG_MMU
>  
>  /* NOTE: must be called outside the CPU execute loop */
> @@ -1257,7 +1258,11 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
>          return NULL;
>      }
>      mce_init(env);
> -
> +    if ((env->cpuid_features & CPUID_APIC)
> +        || smp_cpus > 1) {
> +        env->cpuid_apic_id = env->cpu_index;
> +        env->apic_state = apic_init(env, env->cpuid_apic_id);
> +    }
>      qemu_init_vcpu(env);
>  
>      return env;
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 571d792..407dba6 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -880,7 +880,6 @@ static int kvm_put_sregs(CPUState *env)
>  
>      sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
>      sregs.apic_base = cpu_get_apic_base(env->apic_state);
> -
>      sregs.efer = env->efer;
>  
>      return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);

Would be good to see the full series, i.e. everything that is required
to make CPU hotplug work. First for QEMU upstream without in-kernel
irqchips and then the qemu-kvm bits.

Jan
pingfan liu - Oct. 5, 2011, 10:09 a.m.
On Tue, Oct 4, 2011 at 8:58 PM, Anthony Liguori <anthony@codemonkey.ws>wrote:

> On 10/04/2011 06:13 AM, pingfank@linux.vnet.com wrote:
>
>> From: Liu Ping Fan<pingfank@linux.vnet.ibm.**com<pingfank@linux.vnet.ibm.com>
>> >
>>
>> Separate apic from qbus to icc bus which supports hotplug feature.
>> And make the creation of apic as part of cpu initialization, so
>> apic's state has been ready, before setting kvm_apic.
>>
>> Signed-off-by: Liu Ping Fan<pingfank@linux.vnet.ibm.**com<pingfank@linux.vnet.ibm.com>
>> >
>> ---
>>  Makefile.target      |    1 +
>>  hw/apic.c            |    7 ++++-
>>  hw/apic.h            |    1 +
>>  hw/icc_bus.c         |   62 ++++++++++++++++++++++++++++++**
>> ++++++++++++++++++++
>>  hw/icc_bus.h         |   24 +++++++++++++++++++
>>  hw/pc.c              |   11 ++++-----
>>  target-i386/cpu.h    |    1 +
>>  target-i386/helper.c |    7 +++++-
>>  target-i386/kvm.c    |    1 -
>>  9 files changed, 105 insertions(+), 10 deletions(-)
>>  create mode 100644 hw/icc_bus.c
>>  create mode 100644 hw/icc_bus.h
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index 9011f28..5607c6d 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -241,6 +241,7 @@ obj-i386-$(CONFIG_KVM) += kvmclock.o
>>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>>  obj-i386-y += testdev.o
>>  obj-i386-y += acpi.o acpi_piix4.o
>> +obj-i386-y += icc_bus.o
>>
>>  obj-i386-y += pcspk.o i8254.o
>>  obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
>> diff --git a/hw/apic.c b/hw/apic.c
>> index 69d6ac5..95a1664 100644
>> --- a/hw/apic.c
>> +++ b/hw/apic.c
>> @@ -24,6 +24,7 @@
>>  #include "sysbus.h"
>>  #include "trace.h"
>>  #include "kvm.h"
>> +#include "icc_bus.h"
>>
>>  /* APIC Local Vector Table */
>>  #define APIC_LVT_TIMER   0
>> @@ -304,11 +305,13 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val)
>>
>>      if (!s)
>>          return;
>> +
>>      if (kvm_enabled()&&  kvm_irqchip_in_kernel())
>>
>>          s->apicbase = val;
>>      else
>>          s->apicbase = (val&  0xfffff000) |
>>              (s->apicbase&  (MSR_IA32_APICBASE_BSP |
>> MSR_IA32_APICBASE_ENABLE));
>>
>> +
>>      /* if disabled, cannot be enabled again */
>>      if (!(val&  MSR_IA32_APICBASE_ENABLE)) {
>>          s->apicbase&= ~MSR_IA32_APICBASE_ENABLE;
>>
>> @@ -1075,7 +1078,7 @@ static const VMStateDescription vmstate_apic = {
>>      }
>>  };
>>
>
> Please don't introduce extra whitespace in unrelated code.
>
> Adopt,

>
>
>> -static void apic_reset(DeviceState *d)
>> +void apic_reset(DeviceState *d)
>>  {
>>      APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
>>      int bsp;
>> @@ -1138,7 +1141,7 @@ static SysBusDeviceInfo apic_info = {
>>
>>  static void apic_register_devices(void)
>>  {
>> -    sysbus_register_withprop(&**apic_info);
>> +    iccbus_register(&apic_info);
>>  }
>>
>>  device_init(apic_register_**devices)
>> diff --git a/hw/apic.h b/hw/apic.h
>> index c857d52..e258efa 100644
>> --- a/hw/apic.h
>> +++ b/hw/apic.h
>> @@ -24,5 +24,6 @@ void apic_sipi(DeviceState *s);
>>  /* pc.c */
>>  int cpu_is_bsp(CPUState *env);
>>  DeviceState *cpu_get_current_apic(void);
>> +void apic_reset(DeviceState *d);
>>
>>  #endif
>> diff --git a/hw/icc_bus.c b/hw/icc_bus.c
>> new file mode 100644
>> index 0000000..360ca2a
>> --- /dev/null
>> +++ b/hw/icc_bus.c
>> @@ -0,0 +1,62 @@
>> +/*
>> +*/
>>
>
> New files need to include copyright/licenses.
>
> Adopt,

 +#define ICC_BUS_PLUG
>> +#ifdef ICC_BUS_PLUG
>>
>
> Drop these guards here and at the end of the file.  We conditionally build
> files by having an:
>
> obj-$(CONFIG_ICC_BUS) += icc_bus.o
>
> In the Makefile.
>
> Adopt,


>  +#include "icc_bus.h"
>>
>
>  +
>> +
>> +
>> +struct icc_bus_info icc_info = {
>> +    .qinfo.name = "icc",
>> +    .qinfo.size = sizeof(struct icc_bus),
>> +    .qinfo.props = (Property[]) {
>> +        DEFINE_PROP_END_OF_LIST(),
>> +    }
>> +
>> +};
>>
>
> Structure name is not following Coding Style.
>
I will fix it.

 +
>> +static const VMStateDescription vmstate_icc_bus = {
>> +    .name = "icc_bus",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .minimum_version_id_old = 1,
>> +    .pre_save = NULL,
>> +    .post_load = NULL,
>> +};
>> +
>> +struct icc_bus *g_iccbus;
>> +
>> +struct icc_bus *icc_init_bus(DeviceState *parent, const char *name)
>> +{
>> +    struct icc_bus *bus;
>> +
>> +    bus = FROM_QBUS(icc_bus, qbus_create(&icc_info.qinfo, parent, name));
>> +    bus->qbus.allow_hotplug = 1; /* Yes, we can */
>> +    bus->qbus.name = "icc";
>> +    vmstate_register(NULL, -1,&vmstate_icc_bus, bus);
>>
>> +    g_iccbus = bus;
>> +    return bus;
>> +}
>> +
>> +
>> +
>> +
>> +
>> +static int iccbus_device_init(DeviceState *dev, DeviceInfo *base)
>> +{
>> +    SysBusDeviceInfo *info = container_of(base, SysBusDeviceInfo, qdev);
>> +
>> +    return info->init(sysbus_from_qdev(**dev));
>> +}
>> +
>> +void iccbus_register(**SysBusDeviceInfo *info)
>> +{
>> +    info->qdev.init = iccbus_device_init;
>> +    info->qdev.bus_info =&icc_info.qinfo;
>> +
>> +    assert(info->qdev.size>= sizeof(SysBusDevice));
>> +    qdev_register(&info->qdev);
>> +}
>> +
>>
>
> It's not obvious to me why you need the g_iccbus variable.
>
>  As ICC bus is used by LAPIC and IOAPIC, so I want to export it as a system
wide variable.


>  +#endif
>> diff --git a/hw/icc_bus.h b/hw/icc_bus.h
>> new file mode 100644
>> index 0000000..94d9242
>> --- /dev/null
>> +++ b/hw/icc_bus.h
>> @@ -0,0 +1,24 @@
>> +#ifndef QEMU_ICC_H
>> +#define QEMU_ICC_H
>>
>
>
> Needs copyright and a blurb explaining what this file is and what the ICC
> bus is.
>
> Same comments re: whitespace below.
>
> Regards,
>
> Anthony Liguori
>
>
>  +#include "qdev.h"
>> +#include "sysbus.h"
>> +
>> +typedef struct icc_bus icc_bus;
>> +typedef struct icc_bus_info icc_bus_info;
>> +
>> +
>> +struct icc_bus {
>> +    BusState qbus;
>> +};
>> +
>> +struct icc_bus_info {
>> +    BusInfo qinfo;
>> +};
>> +
>> +extern struct icc_bus_info icc_info;
>> +extern struct icc_bus *g_iccbus;
>> +extern struct icc_bus *icc_init_bus(DeviceState *parent, const char
>> *name);
>> +extern void iccbus_register(**SysBusDeviceInfo *info);
>>
>
> Functions don't need an 'extern' modifier.  I don't think icc_info needs to
> be exported either.
>
> Instead of exporting g_iccbus, it would be better to have a get_icc_bus()
> method that returned the global iccbus.  That way we can more easily hook
> accesses to the bus.
>
> Adopt,

Thanks and regards,

ping fan





 +#endif
>> diff --git a/hw/pc.c b/hw/pc.c
>> index 6b3662e..10371d8 100644
>> --- a/hw/pc.c
>> +++ b/hw/pc.c
>> @@ -24,6 +24,7 @@
>>  #include "hw.h"
>>  #include "pc.h"
>>  #include "apic.h"
>> +#include "icc_bus.h"
>>  #include "fdc.h"
>>  #include "ide.h"
>>  #include "pci.h"
>> @@ -91,6 +92,7 @@ struct e820_table {
>>  static struct e820_table e820_table;
>>  struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
>>
>> +
>>  void isa_irq_handler(void *opaque, int n, int level)
>>  {
>>      IsaIrqState *isa = (IsaIrqState *)opaque;
>> @@ -872,13 +874,13 @@ DeviceState *cpu_get_current_apic(void)
>>      }
>>  }
>>
>> -static DeviceState *apic_init(void *env, uint8_t apic_id)
>> +DeviceState *apic_init(void *env, uint8_t apic_id)
>>  {
>>      DeviceState *dev;
>>      SysBusDevice *d;
>>      static int apic_mapped;
>>
>> -    dev = qdev_create(NULL, "apic");
>> +    dev = qdev_create(&g_iccbus->qbus, "apic");
>>      qdev_prop_set_uint8(dev, "id", apic_id);
>>      qdev_prop_set_ptr(dev, "cpu_env", env);
>>      qdev_init_nofail(dev);
>> @@ -943,10 +945,6 @@ CPUState *pc_new_cpu(const char *cpu_model)
>>          fprintf(stderr, "Unable to find x86 CPU definition\n");
>>          exit(1);
>>      }
>> -    if ((env->cpuid_features&  CPUID_APIC) || smp_cpus>  1) {
>>
>> -        env->cpuid_apic_id = env->cpu_index;
>> -        env->apic_state = apic_init(env, env->cpuid_apic_id);
>> -    }
>>      qemu_register_reset(pc_cpu_**reset, env);
>>      pc_cpu_reset(env);
>>      return env;
>> @@ -956,6 +954,7 @@ void pc_cpus_init(const char *cpu_model)
>>  {
>>      int i;
>>
>> +    icc_init_bus(NULL, "icc");
>>      /* init CPUs */
>>      for(i = 0; i<  smp_cpus; i++) {
>>          pc_new_cpu(cpu_model);
>> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
>> index 2a071f2..0160c55 100644
>> --- a/target-i386/cpu.h
>> +++ b/target-i386/cpu.h
>> @@ -1062,4 +1062,5 @@ void svm_check_intercept(CPUState *env1, uint32_t
>> type);
>>
>>  uint32_t cpu_cc_compute_all(CPUState *env1, int op);
>>
>> +extern DeviceState *apic_init(void *env, uint8_t apic_id);
>>  #endif /* CPU_I386_H */
>> diff --git a/target-i386/helper.c b/target-i386/helper.c
>> index 5df40d4..551a8a2 100644
>> --- a/target-i386/helper.c
>> +++ b/target-i386/helper.c
>> @@ -30,6 +30,7 @@
>>  #include "monitor.h"
>>  #endif
>>
>> +
>>  //#define DEBUG_MMU
>>
>>  /* NOTE: must be called outside the CPU execute loop */
>> @@ -1257,7 +1258,11 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
>>          return NULL;
>>      }
>>      mce_init(env);
>> -
>> +    if ((env->cpuid_features&  CPUID_APIC)
>>
>> +        || smp_cpus>  1) {
>> +        env->cpuid_apic_id = env->cpu_index;
>> +        env->apic_state = apic_init(env, env->cpuid_apic_id);
>> +    }
>>      qemu_init_vcpu(env);
>>
>>      return env;
>> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
>> index 571d792..407dba6 100644
>> --- a/target-i386/kvm.c
>> +++ b/target-i386/kvm.c
>> @@ -880,7 +880,6 @@ static int kvm_put_sregs(CPUState *env)
>>
>>      sregs.cr8 = cpu_get_apic_tpr(env->apic_**state);
>>      sregs.apic_base = cpu_get_apic_base(env->apic_**state);
>> -
>>      sregs.efer = env->efer;
>>
>>      return kvm_vcpu_ioctl(env, KVM_SET_SREGS,&sregs);
>>
>
>
>
pingfan liu - Oct. 5, 2011, 10:26 a.m.
On Wed, Oct 5, 2011 at 12:43 AM, Jan Kiszka <jan.kiszka@web.de> wrote:

> On 2011-10-04 13:13, pingfank@linux.vnet.com wrote:
> > From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> >
> > Separate apic from qbus to icc bus which supports hotplug feature.
>
> Modeling the ICC bus looks like a step in the right direction. The
> IOAPIC could be attached to it as well to get rid of
> "ioapics[MAX_IOAPICS]".


Yes, but it will be the next step.  But I introduce it because with current
code,
the hotplug creation of apic instance by apic_init() will hit the following
assert
 "qdev_create_from_info: Assertion `bus->allow_hotplug' failed."
So I want to separate apic and create ICC-bus which connects LAPICs
together.
Most of all, it can support hotplug just like we can hotplug x86 physical
CPU in real machine.



>  > And make the creation of apic as part of cpu initialization, so
> > apic's state has been ready, before setting kvm_apic.
>
> There is no kvm-apic upstream yet, so it's hard to judge why we need
> this here. If we do, this has to be a separate patch. But I seriously
> doubt we need it (my hack worked without it, and that was not because of
> its hack nature).
>
> Sorry, I did not explain it clearly. What I mean is that “env->apic_state”
must be prepared
before qemu_kvm_cpu_thread_fn() -> ... -> kvm_put_sregs(), where we get
apic_base by
“ sregs.apic_base = cpu_get_apic_base(env->apic_state);”
and then call “kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);” which will
finally affect the
kvm_apic structure in kernel.

But as current code, in pc_new_cpu(), we call apic_init() to initialize
apic_state, after cpu_init(),
so we can not guarantee the order of apic_state initializaion and the
setting to kernel.

Because LAPIC is part of x86 chip, I want to move it into cpu_x86_init(),
and ensure apic_init()
called before thread “qemu_kvm_cpu_thread_fn()” creation.

 >
> > Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> > ---
> >  Makefile.target      |    1 +
> >  hw/apic.c            |    7 ++++-
> >  hw/apic.h            |    1 +
> >  hw/icc_bus.c         |   62
> ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/icc_bus.h         |   24 +++++++++++++++++++
> >  hw/pc.c              |   11 ++++-----
> >  target-i386/cpu.h    |    1 +
> >  target-i386/helper.c |    7 +++++-
> >  target-i386/kvm.c    |    1 -
> >  9 files changed, 105 insertions(+), 10 deletions(-)
> >  create mode 100644 hw/icc_bus.c
> >  create mode 100644 hw/icc_bus.h
> >
> > diff --git a/Makefile.target b/Makefile.target
> > index 9011f28..5607c6d 100644
> > --- a/Makefile.target
> > +++ b/Makefile.target
> > @@ -241,6 +241,7 @@ obj-i386-$(CONFIG_KVM) += kvmclock.o
> >  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
> >  obj-i386-y += testdev.o
> >  obj-i386-y += acpi.o acpi_piix4.o
> > +obj-i386-y += icc_bus.o
> >
> >  obj-i386-y += pcspk.o i8254.o
> >  obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
> > diff --git a/hw/apic.c b/hw/apic.c
> > index 69d6ac5..95a1664 100644
> > --- a/hw/apic.c
> > +++ b/hw/apic.c
> > @@ -24,6 +24,7 @@
> >  #include "sysbus.h"
> >  #include "trace.h"
> >  #include "kvm.h"
> > +#include "icc_bus.h"
> >
> >  /* APIC Local Vector Table */
> >  #define APIC_LVT_TIMER   0
> > @@ -304,11 +305,13 @@ void cpu_set_apic_base(DeviceState *d, uint64_t
> val)
> >
> >      if (!s)
> >          return;
> > +
> >      if (kvm_enabled() && kvm_irqchip_in_kernel())
> >          s->apicbase = val;
> >      else
> >          s->apicbase = (val & 0xfffff000) |
> >              (s->apicbase & (MSR_IA32_APICBASE_BSP |
> MSR_IA32_APICBASE_ENABLE));
> > +
> >      /* if disabled, cannot be enabled again */
> >      if (!(val & MSR_IA32_APICBASE_ENABLE)) {
> >          s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
> > @@ -1075,7 +1078,7 @@ static const VMStateDescription vmstate_apic = {
> >      }
> >  };
> >
> > -static void apic_reset(DeviceState *d)
> > +void apic_reset(DeviceState *d)
>
> Still unused outside of this file, so keep it private.
>
> >  {
> >      APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
> >      int bsp;
> > @@ -1138,7 +1141,7 @@ static SysBusDeviceInfo apic_info = {
> >
> >  static void apic_register_devices(void)
> >  {
> > -    sysbus_register_withprop(&apic_info);
> > +    iccbus_register(&apic_info);
> >  }
> >
> >  device_init(apic_register_devices)
> > diff --git a/hw/apic.h b/hw/apic.h
> > index c857d52..e258efa 100644
> > --- a/hw/apic.h
> > +++ b/hw/apic.h
> > @@ -24,5 +24,6 @@ void apic_sipi(DeviceState *s);
> >  /* pc.c */
> >  int cpu_is_bsp(CPUState *env);
> >  DeviceState *cpu_get_current_apic(void);
> > +void apic_reset(DeviceState *d);
> >
> >  #endif
> > diff --git a/hw/icc_bus.c b/hw/icc_bus.c
> > new file mode 100644
> > index 0000000..360ca2a
> > --- /dev/null
> > +++ b/hw/icc_bus.c
> > @@ -0,0 +1,62 @@
> > +/*
> > +*/
> > +#define ICC_BUS_PLUG
> > +#ifdef ICC_BUS_PLUG
> > +#include "icc_bus.h"
> > +
> > +
> > +
> > +struct icc_bus_info icc_info = {
> > +    .qinfo.name = "icc",
> > +    .qinfo.size = sizeof(struct icc_bus),
> > +    .qinfo.props = (Property[]) {
> > +        DEFINE_PROP_END_OF_LIST(),
> > +    }
> > +
> > +};
> > +
> > +
> > +static const VMStateDescription vmstate_icc_bus = {
> > +    .name = "icc_bus",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +    .minimum_version_id_old = 1,
> > +    .pre_save = NULL,
> > +    .post_load = NULL,
> > +};
> > +
> > +struct icc_bus *g_iccbus;
> > +
> > +struct icc_bus *icc_init_bus(DeviceState *parent, const char *name)
> > +{
> > +    struct icc_bus *bus;
> > +
> > +    bus = FROM_QBUS(icc_bus, qbus_create(&icc_info.qinfo, parent,
> name));
> > +    bus->qbus.allow_hotplug = 1; /* Yes, we can */
> > +    bus->qbus.name = "icc";
> > +    vmstate_register(NULL, -1, &vmstate_icc_bus, bus);
>
> The chipset is the owner of this bus and instantiates it. So it also
> provides a vmstate. You can drop this unneeded one here (it's created
> via an obsolete API anyway).
>

No familiar with Qemu bus emulation, keep on learning :) . But what I
thought is,
the x86-ICC bus is not the same as bus like PCI.
For a PCI bus, it lies behind a host bridge, but ICC is shared by all x86
processors in SMP system,
so there is not a outstanding owner.  And I right?

>
> > +    g_iccbus = bus;
> > +    return bus;
> > +}
> > +
> > +
> > +
> > +
> > +
> > +static int iccbus_device_init(DeviceState *dev, DeviceInfo *base)
> > +{
> > +    SysBusDeviceInfo *info = container_of(base, SysBusDeviceInfo, qdev);
> > +
> > +    return info->init(sysbus_from_qdev(dev));
> > +}
> > +
> > +void iccbus_register(SysBusDeviceInfo *info)
> > +{
> > +    info->qdev.init = iccbus_device_init;
> > +    info->qdev.bus_info = &icc_info.qinfo;
> > +
> > +    assert(info->qdev.size >= sizeof(SysBusDevice));
> > +    qdev_register(&info->qdev);
> > +}
>
> This service should be unneeded when the bus is properly embedded into
> its parent (ie. the chipset).
>
> > +
> > +#endif
> > diff --git a/hw/icc_bus.h b/hw/icc_bus.h
> > new file mode 100644
> > index 0000000..94d9242
> > --- /dev/null
> > +++ b/hw/icc_bus.h
> > @@ -0,0 +1,24 @@
> > +#ifndef QEMU_ICC_H
> > +#define QEMU_ICC_H
> > +
> > +#include "qdev.h"
> > +#include "sysbus.h"
> > +
> > +typedef struct icc_bus icc_bus;
> > +typedef struct icc_bus_info icc_bus_info;
> > +
> > +
> > +struct icc_bus {
> > +    BusState qbus;
> > +};
> > +
> > +struct icc_bus_info {
> > +    BusInfo qinfo;
> > +};
> > +
> > +extern struct icc_bus_info icc_info;
> > +extern struct icc_bus *g_iccbus;
> > +extern struct icc_bus *icc_init_bus(DeviceState *parent, const char
> *name);
> > +extern void iccbus_register(SysBusDeviceInfo *info);
> > +
> > +#endif
> > diff --git a/hw/pc.c b/hw/pc.c
> > index 6b3662e..10371d8 100644
> > --- a/hw/pc.c
> > +++ b/hw/pc.c
> > @@ -24,6 +24,7 @@
> >  #include "hw.h"
> >  #include "pc.h"
> >  #include "apic.h"
> > +#include "icc_bus.h"
> >  #include "fdc.h"
> >  #include "ide.h"
> >  #include "pci.h"
> > @@ -91,6 +92,7 @@ struct e820_table {
> >  static struct e820_table e820_table;
> >  struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
> >
> > +
> >  void isa_irq_handler(void *opaque, int n, int level)
> >  {
> >      IsaIrqState *isa = (IsaIrqState *)opaque;
> > @@ -872,13 +874,13 @@ DeviceState *cpu_get_current_apic(void)
> >      }
> >  }
> >
> > -static DeviceState *apic_init(void *env, uint8_t apic_id)
> > +DeviceState *apic_init(void *env, uint8_t apic_id)
> >  {
> >      DeviceState *dev;
> >      SysBusDevice *d;
> >      static int apic_mapped;
> >
> > -    dev = qdev_create(NULL, "apic");
> > +    dev = qdev_create(&g_iccbus->qbus, "apic");
> >      qdev_prop_set_uint8(dev, "id", apic_id);
> >      qdev_prop_set_ptr(dev, "cpu_env", env);
> >      qdev_init_nofail(dev);
> > @@ -943,10 +945,6 @@ CPUState *pc_new_cpu(const char *cpu_model)
> >          fprintf(stderr, "Unable to find x86 CPU definition\n");
> >          exit(1);
> >      }
> > -    if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
> > -        env->cpuid_apic_id = env->cpu_index;
> > -        env->apic_state = apic_init(env, env->cpuid_apic_id);
> > -    }
> >      qemu_register_reset(pc_cpu_reset, env);
> >      pc_cpu_reset(env);
> >      return env;
> > @@ -956,6 +954,7 @@ void pc_cpus_init(const char *cpu_model)
> >  {
> >      int i;
> >
> > +    icc_init_bus(NULL, "icc");
> >      /* init CPUs */
> >      for(i = 0; i < smp_cpus; i++) {
> >          pc_new_cpu(cpu_model);
> > diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> > index 2a071f2..0160c55 100644
> > --- a/target-i386/cpu.h
> > +++ b/target-i386/cpu.h
> > @@ -1062,4 +1062,5 @@ void svm_check_intercept(CPUState *env1, uint32_t
> type);
> >
> >  uint32_t cpu_cc_compute_all(CPUState *env1, int op);
> >
> > +extern DeviceState *apic_init(void *env, uint8_t apic_id);
> >  #endif /* CPU_I386_H */
> > diff --git a/target-i386/helper.c b/target-i386/helper.c
> > index 5df40d4..551a8a2 100644
> > --- a/target-i386/helper.c
> > +++ b/target-i386/helper.c
> > @@ -30,6 +30,7 @@
> >  #include "monitor.h"
> >  #endif
> >
> > +
> >  //#define DEBUG_MMU
> >
> >  /* NOTE: must be called outside the CPU execute loop */
> > @@ -1257,7 +1258,11 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
> >          return NULL;
> >      }
> >      mce_init(env);
> > -
> > +    if ((env->cpuid_features & CPUID_APIC)
> > +        || smp_cpus > 1) {
> > +        env->cpuid_apic_id = env->cpu_index;
> > +        env->apic_state = apic_init(env, env->cpuid_apic_id);
> > +    }
> >      qemu_init_vcpu(env);
> >
> >      return env;
> > diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> > index 571d792..407dba6 100644
> > --- a/target-i386/kvm.c
> > +++ b/target-i386/kvm.c
> > @@ -880,7 +880,6 @@ static int kvm_put_sregs(CPUState *env)
> >
> >      sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
> >      sregs.apic_base = cpu_get_apic_base(env->apic_state);
> > -
> >      sregs.efer = env->efer;
> >
> >      return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
>
> Would be good to see the full series, i.e. everything that is required
> to make CPU hotplug work. First for QEMU upstream without in-kernel
> irqchips and then the qemu-kvm bits.
>
> OK, I will try that.

Thanks and regards,
ping fan



> Jan
>
>
Jan Kiszka - Oct. 5, 2011, 11:01 a.m.
On 2011-10-05 12:26, liu ping fan wrote:
>>  > And make the creation of apic as part of cpu initialization, so
>>> apic's state has been ready, before setting kvm_apic.
>>
>> There is no kvm-apic upstream yet, so it's hard to judge why we need
>> this here. If we do, this has to be a separate patch. But I seriously
>> doubt we need it (my hack worked without it, and that was not because of
>> its hack nature).
>>
>> Sorry, I did not explain it clearly. What I mean is that “env->apic_state”
> must be prepared
> before qemu_kvm_cpu_thread_fn() -> ... -> kvm_put_sregs(), where we get
> apic_base by
> “ sregs.apic_base = cpu_get_apic_base(env->apic_state);”
> and then call “kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);” which will
> finally affect the
> kvm_apic structure in kernel.
> 
> But as current code, in pc_new_cpu(), we call apic_init() to initialize
> apic_state, after cpu_init(),
> so we can not guarantee the order of apic_state initializaion and the
> setting to kernel.
> 
> Because LAPIC is part of x86 chip, I want to move it into cpu_x86_init(),
> and ensure apic_init()
> called before thread “qemu_kvm_cpu_thread_fn()” creation.

The LAPIC is part of the CPU, the classic APIC was a dedicated chip.

For various reasons, a safer approach for creating a new CPU is to stop
the machine, add the new device models, run cpu_synchronize_post_init on
that new cpu (looks like you missed that) and then resume everything.
See
http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=be8f21c6b54eac82f7add7ee9d4ecf9cb8ebb320

...
>>> diff --git a/hw/icc_bus.c b/hw/icc_bus.c
>>> new file mode 100644
>>> index 0000000..360ca2a
>>> --- /dev/null
>>> +++ b/hw/icc_bus.c
>>> @@ -0,0 +1,62 @@
>>> +/*
>>> +*/
>>> +#define ICC_BUS_PLUG
>>> +#ifdef ICC_BUS_PLUG
>>> +#include "icc_bus.h"
>>> +
>>> +
>>> +
>>> +struct icc_bus_info icc_info = {
>>> +    .qinfo.name = "icc",
>>> +    .qinfo.size = sizeof(struct icc_bus),
>>> +    .qinfo.props = (Property[]) {
>>> +        DEFINE_PROP_END_OF_LIST(),
>>> +    }
>>> +
>>> +};
>>> +
>>> +
>>> +static const VMStateDescription vmstate_icc_bus = {
>>> +    .name = "icc_bus",
>>> +    .version_id = 1,
>>> +    .minimum_version_id = 1,
>>> +    .minimum_version_id_old = 1,
>>> +    .pre_save = NULL,
>>> +    .post_load = NULL,
>>> +};
>>> +
>>> +struct icc_bus *g_iccbus;
>>> +
>>> +struct icc_bus *icc_init_bus(DeviceState *parent, const char *name)
>>> +{
>>> +    struct icc_bus *bus;
>>> +
>>> +    bus = FROM_QBUS(icc_bus, qbus_create(&icc_info.qinfo, parent,
>> name));
>>> +    bus->qbus.allow_hotplug = 1; /* Yes, we can */
>>> +    bus->qbus.name = "icc";
>>> +    vmstate_register(NULL, -1, &vmstate_icc_bus, bus);
>>
>> The chipset is the owner of this bus and instantiates it. So it also
>> provides a vmstate. You can drop this unneeded one here (it's created
>> via an obsolete API anyway).
>>
> 
> No familiar with Qemu bus emulation, keep on learning :) . But what I
> thought is,
> the x86-ICC bus is not the same as bus like PCI.
> For a PCI bus, it lies behind a host bridge, but ICC is shared by all x86
> processors in SMP system,
> so there is not a outstanding owner.  And I right?

ICC is also attached to the chipset (due to the IOAPIC). So it looks
reasonable to me to let the chipset do the lifecycle management as well.
It is the fixed point, CPUs may come and go.

Jan
pingfan liu - Oct. 6, 2011, 1:13 a.m.
On Wed, Oct 5, 2011 at 7:01 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2011-10-05 12:26, liu ping fan wrote:
>>>  > And make the creation of apic as part of cpu initialization, so
>>>> apic's state has been ready, before setting kvm_apic.
>>>
>>> There is no kvm-apic upstream yet, so it's hard to judge why we need
>>> this here. If we do, this has to be a separate patch. But I seriously
>>> doubt we need it (my hack worked without it, and that was not because of
>>> its hack nature).
>>>
>>> Sorry, I did not explain it clearly. What I mean is that “env->apic_state”
>> must be prepared
>> before qemu_kvm_cpu_thread_fn() -> ... -> kvm_put_sregs(), where we get
>> apic_base by
>> “ sregs.apic_base = cpu_get_apic_base(env->apic_state);”
>> and then call “kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);” which will
>> finally affect the
>> kvm_apic structure in kernel.
>>
>> But as current code, in pc_new_cpu(), we call apic_init() to initialize
>> apic_state, after cpu_init(),
>> so we can not guarantee the order of apic_state initializaion and the
>> setting to kernel.
>>
>> Because LAPIC is part of x86 chip, I want to move it into cpu_x86_init(),
>> and ensure apic_init()
>> called before thread “qemu_kvm_cpu_thread_fn()” creation.
>
> The LAPIC is part of the CPU, the classic APIC was a dedicated chip.
Sorry, a little puzzle.  I think x86 interrupt system consists of two
parts: IOAPIC/LAPIC.
For we have "hw/ioapic.c" to simulate IOAPIC,  I think "hw/apic.c"
takes the role as LAPIC,
especially that we create an APICState instance for each CPUX86State,
just like each LAPIC
for x86 CPU in real machine.
So we can consider apic_init() to create a LAPIC instance, rather than
create a  "classic APIC"?

I guess If there is lack of something in IOAPIC/LAPIC bus topology,
that will be the arbitrator of ICC bus, right?
So "the classic APIC was a dedicated chip" what you said, play this
role,  right?
Could you tell me a sample chipset of APIC, and I can increase my
knowledge about it, thanks.

>
> For various reasons, a safer approach for creating a new CPU is to stop
> the machine, add the new device models, run cpu_synchronize_post_init on
> that new cpu (looks like you missed that) and then resume everything.
> See
> http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=be8f21c6b54eac82f7add7ee9d4ecf9cb8ebb320
>
Great job. And I am interesting about it. Could you give some sample
reason about why we need to stop
the machine, or list all of the reasons, so we can resolve it one by
one. I can not figure out such scenes by myself.
From my view, especially for KVM, the creation of vcpu are protected
well by lock mechanism from other
vcpu threads in kernel, so we need not to stop all of the threads.

Hope for suggestion and direct from you, and make a further step for
CPU hot-plug feature,

Thanks and regards,
ping fan


> ...
>>>> diff --git a/hw/icc_bus.c b/hw/icc_bus.c
>>>> new file mode 100644
>>>> index 0000000..360ca2a
>>>> --- /dev/null
>>>> +++ b/hw/icc_bus.c
>>>> @@ -0,0 +1,62 @@
>>>> +/*
>>>> +*/
>>>> +#define ICC_BUS_PLUG
>>>> +#ifdef ICC_BUS_PLUG
>>>> +#include "icc_bus.h"
>>>> +
>>>> +
>>>> +
>>>> +struct icc_bus_info icc_info = {
>>>> +    .qinfo.name = "icc",
>>>> +    .qinfo.size = sizeof(struct icc_bus),
>>>> +    .qinfo.props = (Property[]) {
>>>> +        DEFINE_PROP_END_OF_LIST(),
>>>> +    }
>>>> +
>>>> +};
>>>> +
>>>> +
>>>> +static const VMStateDescription vmstate_icc_bus = {
>>>> +    .name = "icc_bus",
>>>> +    .version_id = 1,
>>>> +    .minimum_version_id = 1,
>>>> +    .minimum_version_id_old = 1,
>>>> +    .pre_save = NULL,
>>>> +    .post_load = NULL,
>>>> +};
>>>> +
>>>> +struct icc_bus *g_iccbus;
>>>> +
>>>> +struct icc_bus *icc_init_bus(DeviceState *parent, const char *name)
>>>> +{
>>>> +    struct icc_bus *bus;
>>>> +
>>>> +    bus = FROM_QBUS(icc_bus, qbus_create(&icc_info.qinfo, parent,
>>> name));
>>>> +    bus->qbus.allow_hotplug = 1; /* Yes, we can */
>>>> +    bus->qbus.name = "icc";
>>>> +    vmstate_register(NULL, -1, &vmstate_icc_bus, bus);
>>>
>>> The chipset is the owner of this bus and instantiates it. So it also
>>> provides a vmstate. You can drop this unneeded one here (it's created
>>> via an obsolete API anyway).
>>>
>>
>> No familiar with Qemu bus emulation, keep on learning :) . But what I
>> thought is,
>> the x86-ICC bus is not the same as bus like PCI.
>> For a PCI bus, it lies behind a host bridge, but ICC is shared by all x86
>> processors in SMP system,
>> so there is not a outstanding owner.  And I right?
>
> ICC is also attached to the chipset (due to the IOAPIC). So it looks
> reasonable to me to let the chipset do the lifecycle management as well.
> It is the fixed point, CPUs may come and go.
>
> Jan
>
>
Anthony Liguori - Oct. 6, 2011, 1:50 a.m.
On 10/05/2011 08:13 PM, liu ping fan wrote:
> On Wed, Oct 5, 2011 at 7:01 PM, Jan Kiszka<jan.kiszka@web.de>  wrote:
>> On 2011-10-05 12:26, liu ping fan wrote:
>>>>   >  And make the creation of apic as part of cpu initialization, so
>>>>> apic's state has been ready, before setting kvm_apic.
>>>>
>>>> There is no kvm-apic upstream yet, so it's hard to judge why we need
>>>> this here. If we do, this has to be a separate patch. But I seriously
>>>> doubt we need it (my hack worked without it, and that was not because of
>>>> its hack nature).
>>>>
>>>> Sorry, I did not explain it clearly. What I mean is that “env->apic_state”
>>> must be prepared
>>> before qemu_kvm_cpu_thread_fn() ->  ... ->  kvm_put_sregs(), where we get
>>> apic_base by
>>> “ sregs.apic_base = cpu_get_apic_base(env->apic_state);”
>>> and then call “kvm_vcpu_ioctl(env, KVM_SET_SREGS,&sregs);” which will
>>> finally affect the
>>> kvm_apic structure in kernel.
>>>
>>> But as current code, in pc_new_cpu(), we call apic_init() to initialize
>>> apic_state, after cpu_init(),
>>> so we can not guarantee the order of apic_state initializaion and the
>>> setting to kernel.
>>>
>>> Because LAPIC is part of x86 chip, I want to move it into cpu_x86_init(),
>>> and ensure apic_init()
>>> called before thread “qemu_kvm_cpu_thread_fn()” creation.
>>
>> The LAPIC is part of the CPU, the classic APIC was a dedicated chip.
> Sorry, a little puzzle.  I think x86 interrupt system consists of two
> parts: IOAPIC/LAPIC.
> For we have "hw/ioapic.c" to simulate IOAPIC,  I think "hw/apic.c"
> takes the role as LAPIC,
> especially that we create an APICState instance for each CPUX86State,
> just like each LAPIC
> for x86 CPU in real machine.
> So we can consider apic_init() to create a LAPIC instance, rather than
> create a  "classic APIC"?
>
> I guess If there is lack of something in IOAPIC/LAPIC bus topology,
> that will be the arbitrator of ICC bus, right?
> So "the classic APIC was a dedicated chip" what you said, play this
> role,  right?
> Could you tell me a sample chipset of APIC, and I can increase my
> knowledge about it, thanks.

I think Jan meant the PIC was a dedicated chip.  hw/apic.c implements an LAPIC, 
hw/i8259.c implements an I8259A otherwise known as the PIC.  hw/ioapic.c 
implements an I/O APIC.

Together, the I/O APIC and the LAPIC form an 'APIC Architecture'.  Usually, the 
legacy PIC can hang off of the BSP LAPIC.

Regards,

Anthony Liguori

>
>>
>> For various reasons, a safer approach for creating a new CPU is to stop
>> the machine, add the new device models, run cpu_synchronize_post_init on
>> that new cpu (looks like you missed that) and then resume everything.
>> See
>> http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=be8f21c6b54eac82f7add7ee9d4ecf9cb8ebb320
>>
> Great job. And I am interesting about it. Could you give some sample
> reason about why we need to stop
> the machine, or list all of the reasons, so we can resolve it one by
> one. I can not figure out such scenes by myself.
>> From my view, especially for KVM, the creation of vcpu are protected
> well by lock mechanism from other
> vcpu threads in kernel, so we need not to stop all of the threads.
>
> Hope for suggestion and direct from you, and make a further step for
> CPU hot-plug feature,
>
> Thanks and regards,
> ping fan
>
>
>> ...
>>>>> diff --git a/hw/icc_bus.c b/hw/icc_bus.c
>>>>> new file mode 100644
>>>>> index 0000000..360ca2a
>>>>> --- /dev/null
>>>>> +++ b/hw/icc_bus.c
>>>>> @@ -0,0 +1,62 @@
>>>>> +/*
>>>>> +*/
>>>>> +#define ICC_BUS_PLUG
>>>>> +#ifdef ICC_BUS_PLUG
>>>>> +#include "icc_bus.h"
>>>>> +
>>>>> +
>>>>> +
>>>>> +struct icc_bus_info icc_info = {
>>>>> +    .qinfo.name = "icc",
>>>>> +    .qinfo.size = sizeof(struct icc_bus),
>>>>> +    .qinfo.props = (Property[]) {
>>>>> +        DEFINE_PROP_END_OF_LIST(),
>>>>> +    }
>>>>> +
>>>>> +};
>>>>> +
>>>>> +
>>>>> +static const VMStateDescription vmstate_icc_bus = {
>>>>> +    .name = "icc_bus",
>>>>> +    .version_id = 1,
>>>>> +    .minimum_version_id = 1,
>>>>> +    .minimum_version_id_old = 1,
>>>>> +    .pre_save = NULL,
>>>>> +    .post_load = NULL,
>>>>> +};
>>>>> +
>>>>> +struct icc_bus *g_iccbus;
>>>>> +
>>>>> +struct icc_bus *icc_init_bus(DeviceState *parent, const char *name)
>>>>> +{
>>>>> +    struct icc_bus *bus;
>>>>> +
>>>>> +    bus = FROM_QBUS(icc_bus, qbus_create(&icc_info.qinfo, parent,
>>>> name));
>>>>> +    bus->qbus.allow_hotplug = 1; /* Yes, we can */
>>>>> +    bus->qbus.name = "icc";
>>>>> +    vmstate_register(NULL, -1,&vmstate_icc_bus, bus);
>>>>
>>>> The chipset is the owner of this bus and instantiates it. So it also
>>>> provides a vmstate. You can drop this unneeded one here (it's created
>>>> via an obsolete API anyway).
>>>>
>>>
>>> No familiar with Qemu bus emulation, keep on learning :) . But what I
>>> thought is,
>>> the x86-ICC bus is not the same as bus like PCI.
>>> For a PCI bus, it lies behind a host bridge, but ICC is shared by all x86
>>> processors in SMP system,
>>> so there is not a outstanding owner.  And I right?
>>
>> ICC is also attached to the chipset (due to the IOAPIC). So it looks
>> reasonable to me to let the chipset do the lifecycle management as well.
>> It is the fixed point, CPUs may come and go.
>>
>> Jan
>>
>>
>
Jan Kiszka - Oct. 6, 2011, 6:58 a.m.
On 2011-10-06 03:13, liu ping fan wrote:
> On Wed, Oct 5, 2011 at 7:01 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
>> On 2011-10-05 12:26, liu ping fan wrote:
>>>>  > And make the creation of apic as part of cpu initialization, so
>>>>> apic's state has been ready, before setting kvm_apic.
>>>>
>>>> There is no kvm-apic upstream yet, so it's hard to judge why we need
>>>> this here. If we do, this has to be a separate patch. But I seriously
>>>> doubt we need it (my hack worked without it, and that was not because of
>>>> its hack nature).
>>>>
>>>> Sorry, I did not explain it clearly. What I mean is that “env->apic_state”
>>> must be prepared
>>> before qemu_kvm_cpu_thread_fn() -> ... -> kvm_put_sregs(), where we get
>>> apic_base by
>>> “ sregs.apic_base = cpu_get_apic_base(env->apic_state);”
>>> and then call “kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);” which will
>>> finally affect the
>>> kvm_apic structure in kernel.
>>>
>>> But as current code, in pc_new_cpu(), we call apic_init() to initialize
>>> apic_state, after cpu_init(),
>>> so we can not guarantee the order of apic_state initializaion and the
>>> setting to kernel.
>>>
>>> Because LAPIC is part of x86 chip, I want to move it into cpu_x86_init(),
>>> and ensure apic_init()
>>> called before thread “qemu_kvm_cpu_thread_fn()” creation.
>>
>> The LAPIC is part of the CPU, the classic APIC was a dedicated chip.
> Sorry, a little puzzle.  I think x86 interrupt system consists of two
> parts: IOAPIC/LAPIC.
> For we have "hw/ioapic.c" to simulate IOAPIC,  I think "hw/apic.c"
> takes the role as LAPIC,
> especially that we create an APICState instance for each CPUX86State,
> just like each LAPIC
> for x86 CPU in real machine.
> So we can consider apic_init() to create a LAPIC instance, rather than
> create a  "classic APIC"?
> 
> I guess If there is lack of something in IOAPIC/LAPIC bus topology,
> that will be the arbitrator of ICC bus, right?
> So "the classic APIC was a dedicated chip" what you said, play this
> role,  right?
> Could you tell me a sample chipset of APIC, and I can increase my
> knowledge about it, thanks.

The 82489DX was used as a discrete APIC on 486 SMP systems.

> 
>>
>> For various reasons, a safer approach for creating a new CPU is to stop
>> the machine, add the new device models, run cpu_synchronize_post_init on
>> that new cpu (looks like you missed that) and then resume everything.
>> See
>> http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=be8f21c6b54eac82f7add7ee9d4ecf9cb8ebb320
>>
> Great job. And I am interesting about it. Could you give some sample
> reason about why we need to stop
> the machine, or list all of the reasons, so we can resolve it one by
> one. I can not figure out such scenes by myself.
> From my view, especially for KVM, the creation of vcpu are protected
> well by lock mechanism from other
> vcpu threads in kernel, so we need not to stop all of the threads.

Maybe I was seeing ghosts: I thought that there is a race window between
VCPU_CREATE and the last initialization IOCTL when we allow other VCPUs
to interact with the new one already. However, I do not find the
scenario again ATM.

But if you want to move the VCPU resource initialization completely over
the VCPU thread, you also have to handle env->halted in that context.
See [1] for this topic and associated todos.

And don't forget the cpu_synchronize_post_init. Running this after each
VCPU creation directly should also obsolete cpu_synchronize_all_post_init.

Jan

[1] http://thread.gmane.org/gmane.comp.emulators.qemu/100806
pingfan liu - Oct. 7, 2011, 12:54 p.m.
On 10/6/11, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2011-10-06 03:13, liu ping fan wrote:
>> On Wed, Oct 5, 2011 at 7:01 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
>>> On 2011-10-05 12:26, liu ping fan wrote:
>>>>>  > And make the creation of apic as part of cpu initialization, so
>>>>>> apic's state has been ready, before setting kvm_apic.
>>>>>
>>>>> There is no kvm-apic upstream yet, so it's hard to judge why we need
>>>>> this here. If we do, this has to be a separate patch. But I seriously
>>>>> doubt we need it (my hack worked without it, and that was not because
>>>>> of
>>>>> its hack nature).
>>>>>
>>>>> Sorry, I did not explain it clearly. What I mean is that
>>>>> “env->apic_state”
>>>> must be prepared
>>>> before qemu_kvm_cpu_thread_fn() -> ... -> kvm_put_sregs(), where we get
>>>> apic_base by
>>>> “ sregs.apic_base = cpu_get_apic_base(env->apic_state);”
>>>> and then call “kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);” which will
>>>> finally affect the
>>>> kvm_apic structure in kernel.
>>>>
>>>> But as current code, in pc_new_cpu(), we call apic_init() to initialize
>>>> apic_state, after cpu_init(),
>>>> so we can not guarantee the order of apic_state initializaion and the
>>>> setting to kernel.
>>>>
>>>> Because LAPIC is part of x86 chip, I want to move it into
>>>> cpu_x86_init(),
>>>> and ensure apic_init()
>>>> called before thread “qemu_kvm_cpu_thread_fn()” creation.
>>>
>>> The LAPIC is part of the CPU, the classic APIC was a dedicated chip.
>> Sorry, a little puzzle.  I think x86 interrupt system consists of two
>> parts: IOAPIC/LAPIC.
>> For we have "hw/ioapic.c" to simulate IOAPIC,  I think "hw/apic.c"
>> takes the role as LAPIC,
>> especially that we create an APICState instance for each CPUX86State,
>> just like each LAPIC
>> for x86 CPU in real machine.
>> So we can consider apic_init() to create a LAPIC instance, rather than
>> create a  "classic APIC"?
>>
>> I guess If there is lack of something in IOAPIC/LAPIC bus topology,
>> that will be the arbitrator of ICC bus, right?
>> So "the classic APIC was a dedicated chip" what you said, play this
>> role,  right?
>> Could you tell me a sample chipset of APIC, and I can increase my
>> knowledge about it, thanks.
>
> The 82489DX was used as a discrete APIC on 486 SMP systems.
>
>>
>>>
>>> For various reasons, a safer approach for creating a new CPU is to stop
>>> the machine, add the new device models, run cpu_synchronize_post_init on
>>> that new cpu (looks like you missed that) and then resume everything.
>>> See
>>> http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=be8f21c6b54eac82f7add7ee9d4ecf9cb8ebb320
>>>
>> Great job. And I am interesting about it. Could you give some sample
>> reason about why we need to stop
>> the machine, or list all of the reasons, so we can resolve it one by
>> one. I can not figure out such scenes by myself.
>> From my view, especially for KVM, the creation of vcpu are protected
>> well by lock mechanism from other
>> vcpu threads in kernel, so we need not to stop all of the threads.
>
> Maybe I was seeing ghosts: I thought that there is a race window between
> VCPU_CREATE and the last initialization IOCTL when we allow other VCPUs
> to interact with the new one already. However, I do not find the
> scenario again ATM.
>
> But if you want to move the VCPU resource initialization completely over
> the VCPU thread, you also have to handle env->halted in that context.
> See [1] for this topic and associated todos.
>
> And don't forget the cpu_synchronize_post_init. Running this after each
> VCPU creation directly should also obsolete cpu_synchronize_all_post_init.
Thanks, Jan.  I will dig into this and follow the thread to see what
to do in next
step

Regards,
ping fan
>
> Jan
>
> [1] http://thread.gmane.org/gmane.comp.emulators.qemu/100806
>
>

Patch

diff --git a/Makefile.target b/Makefile.target
index 9011f28..5607c6d 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -241,6 +241,7 @@  obj-i386-$(CONFIG_KVM) += kvmclock.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 obj-i386-y += testdev.o
 obj-i386-y += acpi.o acpi_piix4.o
+obj-i386-y += icc_bus.o
 
 obj-i386-y += pcspk.o i8254.o
 obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
diff --git a/hw/apic.c b/hw/apic.c
index 69d6ac5..95a1664 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -24,6 +24,7 @@ 
 #include "sysbus.h"
 #include "trace.h"
 #include "kvm.h"
+#include "icc_bus.h"
 
 /* APIC Local Vector Table */
 #define APIC_LVT_TIMER   0
@@ -304,11 +305,13 @@  void cpu_set_apic_base(DeviceState *d, uint64_t val)
 
     if (!s)
         return;
+
     if (kvm_enabled() && kvm_irqchip_in_kernel())
         s->apicbase = val;
     else
         s->apicbase = (val & 0xfffff000) |
             (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
+
     /* if disabled, cannot be enabled again */
     if (!(val & MSR_IA32_APICBASE_ENABLE)) {
         s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
@@ -1075,7 +1078,7 @@  static const VMStateDescription vmstate_apic = {
     }
 };
 
-static void apic_reset(DeviceState *d)
+void apic_reset(DeviceState *d)
 {
     APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
     int bsp;
@@ -1138,7 +1141,7 @@  static SysBusDeviceInfo apic_info = {
 
 static void apic_register_devices(void)
 {
-    sysbus_register_withprop(&apic_info);
+    iccbus_register(&apic_info);
 }
 
 device_init(apic_register_devices)
diff --git a/hw/apic.h b/hw/apic.h
index c857d52..e258efa 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -24,5 +24,6 @@  void apic_sipi(DeviceState *s);
 /* pc.c */
 int cpu_is_bsp(CPUState *env);
 DeviceState *cpu_get_current_apic(void);
+void apic_reset(DeviceState *d);
 
 #endif
diff --git a/hw/icc_bus.c b/hw/icc_bus.c
new file mode 100644
index 0000000..360ca2a
--- /dev/null
+++ b/hw/icc_bus.c
@@ -0,0 +1,62 @@ 
+/*
+*/
+#define ICC_BUS_PLUG
+#ifdef ICC_BUS_PLUG
+#include "icc_bus.h"
+
+
+
+struct icc_bus_info icc_info = {
+    .qinfo.name = "icc",
+    .qinfo.size = sizeof(struct icc_bus),
+    .qinfo.props = (Property[]) {
+        DEFINE_PROP_END_OF_LIST(),
+    }
+
+};
+
+
+static const VMStateDescription vmstate_icc_bus = {
+    .name = "icc_bus",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = NULL,
+    .post_load = NULL,
+};
+
+struct icc_bus *g_iccbus;
+
+struct icc_bus *icc_init_bus(DeviceState *parent, const char *name)
+{
+    struct icc_bus *bus;
+
+    bus = FROM_QBUS(icc_bus, qbus_create(&icc_info.qinfo, parent, name));
+    bus->qbus.allow_hotplug = 1; /* Yes, we can */
+    bus->qbus.name = "icc";
+    vmstate_register(NULL, -1, &vmstate_icc_bus, bus);
+    g_iccbus = bus;
+    return bus;
+}
+
+
+
+
+
+static int iccbus_device_init(DeviceState *dev, DeviceInfo *base)
+{
+    SysBusDeviceInfo *info = container_of(base, SysBusDeviceInfo, qdev);
+
+    return info->init(sysbus_from_qdev(dev));
+}
+
+void iccbus_register(SysBusDeviceInfo *info)
+{
+    info->qdev.init = iccbus_device_init;
+    info->qdev.bus_info = &icc_info.qinfo;
+
+    assert(info->qdev.size >= sizeof(SysBusDevice));
+    qdev_register(&info->qdev);
+}
+
+#endif
diff --git a/hw/icc_bus.h b/hw/icc_bus.h
new file mode 100644
index 0000000..94d9242
--- /dev/null
+++ b/hw/icc_bus.h
@@ -0,0 +1,24 @@ 
+#ifndef QEMU_ICC_H
+#define QEMU_ICC_H
+
+#include "qdev.h"
+#include "sysbus.h"
+
+typedef struct icc_bus icc_bus;
+typedef struct icc_bus_info icc_bus_info;
+
+
+struct icc_bus {
+    BusState qbus;
+};
+
+struct icc_bus_info {
+    BusInfo qinfo;
+};
+
+extern struct icc_bus_info icc_info;
+extern struct icc_bus *g_iccbus;
+extern struct icc_bus *icc_init_bus(DeviceState *parent, const char *name);
+extern void iccbus_register(SysBusDeviceInfo *info);
+
+#endif
diff --git a/hw/pc.c b/hw/pc.c
index 6b3662e..10371d8 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -24,6 +24,7 @@ 
 #include "hw.h"
 #include "pc.h"
 #include "apic.h"
+#include "icc_bus.h"
 #include "fdc.h"
 #include "ide.h"
 #include "pci.h"
@@ -91,6 +92,7 @@  struct e820_table {
 static struct e820_table e820_table;
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 
+
 void isa_irq_handler(void *opaque, int n, int level)
 {
     IsaIrqState *isa = (IsaIrqState *)opaque;
@@ -872,13 +874,13 @@  DeviceState *cpu_get_current_apic(void)
     }
 }
 
-static DeviceState *apic_init(void *env, uint8_t apic_id)
+DeviceState *apic_init(void *env, uint8_t apic_id)
 {
     DeviceState *dev;
     SysBusDevice *d;
     static int apic_mapped;
 
-    dev = qdev_create(NULL, "apic");
+    dev = qdev_create(&g_iccbus->qbus, "apic");
     qdev_prop_set_uint8(dev, "id", apic_id);
     qdev_prop_set_ptr(dev, "cpu_env", env);
     qdev_init_nofail(dev);
@@ -943,10 +945,6 @@  CPUState *pc_new_cpu(const char *cpu_model)
         fprintf(stderr, "Unable to find x86 CPU definition\n");
         exit(1);
     }
-    if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
-        env->cpuid_apic_id = env->cpu_index;
-        env->apic_state = apic_init(env, env->cpuid_apic_id);
-    }
     qemu_register_reset(pc_cpu_reset, env);
     pc_cpu_reset(env);
     return env;
@@ -956,6 +954,7 @@  void pc_cpus_init(const char *cpu_model)
 {
     int i;
 
+    icc_init_bus(NULL, "icc");
     /* init CPUs */
     for(i = 0; i < smp_cpus; i++) {
         pc_new_cpu(cpu_model);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 2a071f2..0160c55 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1062,4 +1062,5 @@  void svm_check_intercept(CPUState *env1, uint32_t type);
 
 uint32_t cpu_cc_compute_all(CPUState *env1, int op);
 
+extern DeviceState *apic_init(void *env, uint8_t apic_id);
 #endif /* CPU_I386_H */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 5df40d4..551a8a2 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -30,6 +30,7 @@ 
 #include "monitor.h"
 #endif
 
+
 //#define DEBUG_MMU
 
 /* NOTE: must be called outside the CPU execute loop */
@@ -1257,7 +1258,11 @@  CPUX86State *cpu_x86_init(const char *cpu_model)
         return NULL;
     }
     mce_init(env);
-
+    if ((env->cpuid_features & CPUID_APIC)
+        || smp_cpus > 1) {
+        env->cpuid_apic_id = env->cpu_index;
+        env->apic_state = apic_init(env, env->cpuid_apic_id);
+    }
     qemu_init_vcpu(env);
 
     return env;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 571d792..407dba6 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -880,7 +880,6 @@  static int kvm_put_sregs(CPUState *env)
 
     sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
     sregs.apic_base = cpu_get_apic_base(env->apic_state);
-
     sregs.efer = env->efer;
 
     return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);