Patchwork [10/21] acpi_piix4: add infrastructure to send CPU hot-plug GPE to guest

login
register
mail settings
Submitter Igor Mammedov
Date April 23, 2013, 8:29 a.m.
Message ID <1366705795-24732-11-git-send-email-imammedo@redhat.com>
Download mbox | patch
Permalink /patch/238798/
State New
Headers show

Comments

Igor Mammedov - April 23, 2013, 8:29 a.m.
* introduce processor status bitmask visible to guest at 0xaf00 addr,
  where ACPI asl code expects it
* set bit corresponding to APIC ID in processor status bitmask on
  receiving CPU hot-plug notification
* trigger CPU hot-plug SCI, to notify guest about CPU hot-plug event

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
v4:
  * added spec for QEMU-Seabios interface
  * added PIIX4_ prefix to PROC_ defines
v3:
  * s/get_firmware_id()/get_arch_id()/ due rebase
  * s/cpu_add_notifier/cpu_added_notifier/
v2:
  * use CPUClass.get_firmware_id() to make code target independent
  * bump up vmstate_acpi version
---
 docs/specs/acpi_cpu_hotplug.txt |   22 +++++++
 hw/acpi/piix4.c                 |  117 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 136 insertions(+), 3 deletions(-)
 create mode 100644 docs/specs/acpi_cpu_hotplug.txt
Juan Quintela - April 23, 2013, 11:38 a.m.
Igor Mammedov <imammedo@redhat.com> wrote:

>  
> +#define VMSTATE_CPU_STATUS_ARRAY(_field, _state)                             \
> + {                                                                           \
> +     .name       = (stringify(_field)),                                      \
> +     .version_id = 0,
> \

this line should be:
   .version_id = 4, 


> +     .num        = PIIX4_PROC_LEN,                                           \
> +     .info       = &vmstate_info_uint8,                                      \
> +     .size       = sizeof(uint8_t),                                          \
> +     .flags      = VMS_ARRAY,                                                \
> +     .offset     = vmstate_offset_array(_state, _field, uint8_t,             \
> +                                        PIIX4_PROC_LEN),                     \
> + }
> +
>  /* qemu-kvm 1.2 uses version 3 but advertised as 2
>   * To support incoming qemu-kvm 1.2 migration, change version_id
>   * and minimum_version_id to 2 below (which breaks migration from

> @@ -265,7 +289,7 @@ static int acpi_load_old(QEMUFile *f, void *opaque, int version_id)
>   */
>  static const VMStateDescription vmstate_acpi = {
>      .name = "piix4_pm",
> -    .version_id = 3,
> +    .version_id = 4,
>      .minimum_version_id = 3,
>      .minimum_version_id_old = 1,
>      .load_state_old = acpi_load_old,
> @@ -281,6 +305,7 @@ static const VMStateDescription vmstate_acpi = {
>          VMSTATE_STRUCT(ar.gpe, PIIX4PMState, 2, vmstate_gpe, ACPIGPE),
>          VMSTATE_STRUCT(pci0_status, PIIX4PMState, 2, vmstate_pci_status,
>                         struct pci_status),
> +        VMSTATE_CPU_STATUS_ARRAY(gpe_cpu.sts, PIIX4PMState),

It is more,  I think that simply:

VMSTATE_UINT8_ARRAY_V(gpu_cpu.sts,  PIIX4PMstate, PIIX4_PROC_LEN,  4);

Should do the trick without the previous blob (it was needed for the old
version because we have a uint32 data but we send a uint16 one).

Could you check?  I don't have an easy way to test that it "receives"
the right value.

Later,  Juan.

>          VMSTATE_END_OF_LIST()
>      }
>  };
> @@ -585,6 +610,85 @@ static const MemoryRegionOps piix4_pci_ops = {
Igor Mammedov - April 23, 2013, 12:54 p.m.
On Tue, 23 Apr 2013 13:38:10 +0200
Juan Quintela <quintela@redhat.com> wrote:

> Igor Mammedov <imammedo@redhat.com> wrote:
> 
> >  
> > +#define VMSTATE_CPU_STATUS_ARRAY(_field,
> > _state)                             \
> > +
> > {                                                                           \
> > +     .name       =
> > (stringify(_field)),                                      \
> > +     .version_id = 0,
> > \
> 
> this line should be:
>    .version_id = 4, 
> 
> 
> > +     .num        =
> > PIIX4_PROC_LEN,                                           \
> > +     .info       =
> > &vmstate_info_uint8,                                      \
> > +     .size       =
> > sizeof(uint8_t),                                          \
> > +     .flags      =
> > VMS_ARRAY,                                                \
> > +     .offset     = vmstate_offset_array(_state, _field,
> > uint8_t,             \
> > +
> > PIIX4_PROC_LEN),                     \
> > + }
> > +
> >  /* qemu-kvm 1.2 uses version 3 but advertised as 2
> >   * To support incoming qemu-kvm 1.2 migration, change version_id
> >   * and minimum_version_id to 2 below (which breaks migration from
> 
> > @@ -265,7 +289,7 @@ static int acpi_load_old(QEMUFile *f, void *opaque,
> > int version_id) */
> >  static const VMStateDescription vmstate_acpi = {
> >      .name = "piix4_pm",
> > -    .version_id = 3,
> > +    .version_id = 4,
> >      .minimum_version_id = 3,
> >      .minimum_version_id_old = 1,
> >      .load_state_old = acpi_load_old,
> > @@ -281,6 +305,7 @@ static const VMStateDescription vmstate_acpi = {
> >          VMSTATE_STRUCT(ar.gpe, PIIX4PMState, 2, vmstate_gpe, ACPIGPE),
> >          VMSTATE_STRUCT(pci0_status, PIIX4PMState, 2, vmstate_pci_status,
> >                         struct pci_status),
> > +        VMSTATE_CPU_STATUS_ARRAY(gpe_cpu.sts, PIIX4PMState),
> 
> It is more,  I think that simply:
> 
> VMSTATE_UINT8_ARRAY_V(gpu_cpu.sts,  PIIX4PMstate, PIIX4_PROC_LEN,  4);
> 
> Should do the trick without the previous blob (it was needed for the old
> version because we have a uint32 data but we send a uint16 one).
> 
> Could you check?  I don't have an easy way to test that it "receives"
> the right value.
Just checked, it works with VMSTATE_UINT8_ARRAY_V as well,

but I have a question, why version should be 4, looking at git history
components of vmstate_acpi don't advance their version each time
vmstate_acpi change, they do it only when they themselves change.

> 
> Later,  Juan.
> 
> >          VMSTATE_END_OF_LIST()
> >      }
> >  };
> > @@ -585,6 +610,85 @@ static const MemoryRegionOps piix4_pci_ops = {
>
Michael S. Tsirkin - April 23, 2013, 1:04 p.m.
On Tue, Apr 23, 2013 at 02:54:16PM +0200, Igor Mammedov wrote:
> On Tue, 23 Apr 2013 13:38:10 +0200
> Juan Quintela <quintela@redhat.com> wrote:
> 
> > Igor Mammedov <imammedo@redhat.com> wrote:
> > 
> > >  
> > > +#define VMSTATE_CPU_STATUS_ARRAY(_field,
> > > _state)                             \
> > > +
> > > {                                                                           \
> > > +     .name       =
> > > (stringify(_field)),                                      \
> > > +     .version_id = 0,
> > > \
> > 
> > this line should be:
> >    .version_id = 4, 
> > 
> > 
> > > +     .num        =
> > > PIIX4_PROC_LEN,                                           \
> > > +     .info       =
> > > &vmstate_info_uint8,                                      \
> > > +     .size       =
> > > sizeof(uint8_t),                                          \
> > > +     .flags      =
> > > VMS_ARRAY,                                                \
> > > +     .offset     = vmstate_offset_array(_state, _field,
> > > uint8_t,             \
> > > +
> > > PIIX4_PROC_LEN),                     \
> > > + }
> > > +
> > >  /* qemu-kvm 1.2 uses version 3 but advertised as 2
> > >   * To support incoming qemu-kvm 1.2 migration, change version_id
> > >   * and minimum_version_id to 2 below (which breaks migration from
> > 
> > > @@ -265,7 +289,7 @@ static int acpi_load_old(QEMUFile *f, void *opaque,
> > > int version_id) */
> > >  static const VMStateDescription vmstate_acpi = {
> > >      .name = "piix4_pm",
> > > -    .version_id = 3,
> > > +    .version_id = 4,
> > >      .minimum_version_id = 3,
> > >      .minimum_version_id_old = 1,
> > >      .load_state_old = acpi_load_old,
> > > @@ -281,6 +305,7 @@ static const VMStateDescription vmstate_acpi = {
> > >          VMSTATE_STRUCT(ar.gpe, PIIX4PMState, 2, vmstate_gpe, ACPIGPE),
> > >          VMSTATE_STRUCT(pci0_status, PIIX4PMState, 2, vmstate_pci_status,
> > >                         struct pci_status),
> > > +        VMSTATE_CPU_STATUS_ARRAY(gpe_cpu.sts, PIIX4PMState),
> > 
> > It is more,  I think that simply:
> > 
> > VMSTATE_UINT8_ARRAY_V(gpu_cpu.sts,  PIIX4PMstate, PIIX4_PROC_LEN,  4);
> > 
> > Should do the trick without the previous blob (it was needed for the old
> > version because we have a uint32 data but we send a uint16 one).
> > 
> > Could you check?  I don't have an easy way to test that it "receives"
> > the right value.
> Just checked, it works with VMSTATE_UINT8_ARRAY_V as well,
> 
> but I have a question, why version should be 4, looking at git history
> components of vmstate_acpi don't advance their version each time
> vmstate_acpi change, they do it only when they themselves change.

Generally changing version breaks cross version migration.
So please don't do it for an optional
feature like CPU hotplug.

> > 
> > Later,  Juan.
> > 
> > >          VMSTATE_END_OF_LIST()
> > >      }
> > >  };
> > > @@ -585,6 +610,85 @@ static const MemoryRegionOps piix4_pci_ops = {
> >
Juan Quintela - April 23, 2013, 1:16 p.m.
Igor Mammedov <imammedo@redhat.com> wrote:
> On Tue, 23 Apr 2013 13:38:10 +0200
> Juan Quintela <quintela@redhat.com> wrote:
>
>> Igor Mammedov <imammedo@redhat.com> wrote:
>> 
>> >  
>> > +#define VMSTATE_CPU_STATUS_ARRAY(_field,
>> > _state)                             \
>> > +
>> > {                                                                           \
>> > +     .name       =
>> > (stringify(_field)),                                      \
>> > +     .version_id = 0,
>> > \
>> 
>> this line should be:
>>    .version_id = 4, 
>> 
>> 
>> > +     .num        =
>> > PIIX4_PROC_LEN,                                           \
>> > +     .info       =
>> > &vmstate_info_uint8,                                      \
>> > +     .size       =
>> > sizeof(uint8_t),                                          \
>> > +     .flags      =
>> > VMS_ARRAY,                                                \
>> > +     .offset     = vmstate_offset_array(_state, _field,
>> > uint8_t,             \
>> > +
>> > PIIX4_PROC_LEN),                     \
>> > + }
>> > +
>> >  /* qemu-kvm 1.2 uses version 3 but advertised as 2
>> >   * To support incoming qemu-kvm 1.2 migration, change version_id
>> >   * and minimum_version_id to 2 below (which breaks migration from
>> 
>> > @@ -265,7 +289,7 @@ static int acpi_load_old(QEMUFile *f, void *opaque,
>> > int version_id) */
>> >  static const VMStateDescription vmstate_acpi = {
>> >      .name = "piix4_pm",
>> > -    .version_id = 3,
>> > +    .version_id = 4,
>> >      .minimum_version_id = 3,
>> >      .minimum_version_id_old = 1,
>> >      .load_state_old = acpi_load_old,
>> > @@ -281,6 +305,7 @@ static const VMStateDescription vmstate_acpi = {
>> >          VMSTATE_STRUCT(ar.gpe, PIIX4PMState, 2, vmstate_gpe, ACPIGPE),
>> >          VMSTATE_STRUCT(pci0_status, PIIX4PMState, 2, vmstate_pci_status,
>> >                         struct pci_status),
>> > +        VMSTATE_CPU_STATUS_ARRAY(gpe_cpu.sts, PIIX4PMState),
>> 
>> It is more,  I think that simply:
>> 
>> VMSTATE_UINT8_ARRAY_V(gpu_cpu.sts,  PIIX4PMstate, PIIX4_PROC_LEN,  4);
>> 
>> Should do the trick without the previous blob (it was needed for the old
>> version because we have a uint32 data but we send a uint16 one).
>> 
>> Could you check?  I don't have an easy way to test that it "receives"
>> the right value.
> Just checked, it works with VMSTATE_UINT8_ARRAY_V as well,
>
> but I have a question, why version should be 4, looking at git history
> components of vmstate_acpi don't advance their version each time
> vmstate_acpi change, they do it only when they themselves change.

I didn't knew this was an optional field.  Is this only needed for cpu
hotplug,  or said otherwise,  when iti s needed?  If so,  I can try to
put an optional section there.

Later,  Juan.


>
>> 
>> Later,  Juan.
>> 
>> >          VMSTATE_END_OF_LIST()
>> >      }
>> >  };
>> > @@ -585,6 +610,85 @@ static const MemoryRegionOps piix4_pci_ops = {
>>
Juan Quintela - April 23, 2013, 1:43 p.m.
Igor Mammedov <imammedo@redhat.com> wrote:
> * introduce processor status bitmask visible to guest at 0xaf00 addr,
>   where ACPI asl code expects it
> * set bit corresponding to APIC ID in processor status bitmask on
>   receiving CPU hot-plug notification
> * trigger CPU hot-plug SCI, to notify guest about CPU hot-plug event
>
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>

This is wrong (or at least supperfluous)

> +static int piix4_init_cpu_status(Object *obj, void *opaque)
> +{
> +    struct cpu_status *g = (struct cpu_status *)opaque;
> +    Object *cpu_obj = object_dynamic_cast(obj, TYPE_CPU);
> +
> +    if (cpu_obj) {
> +        struct Error *error = NULL;

we set error to NULL

> +        CPUClass *k = CPU_GET_CLASS(cpu_obj);
> +        int64_t id = k->get_arch_id(CPU(cpu_obj));
> +
> +        if (error) {

and without touching error we test if it is != NULL.


something is missing here?

> +            fprintf(stderr, "failed to initilize CPU status for ACPI: %s\n",
> +                    error_get_pretty(error));
> +            error_free(error);
> +            abort();

Can't we return an error code at this point?  I guess no,  but asking
will not hurt.

> +        }
> +        g_assert((id / 8) < PIIX4_PROC_LEN);
> +        g->sts[id / 8] |= (1 << (id % 8));
> +    }
> +    return object_child_foreach(obj, piix4_init_cpu_status, opaque);
> +}
> +
>  static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
>                                  PCIHotplugState state);
>  
> @@ -600,6 +704,13 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
>      memory_region_add_subregion(parent, PCI_HOTPLUG_ADDR,
>                                  &s->io_pci);
>      pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev);
> +
> +    piix4_init_cpu_status(qdev_get_machine(), &s->gpe_cpu);
> +    memory_region_init_io(&s->io_cpu, &cpu_hotplug_ops, s, "apci-cpu-hotplug",
> +                          PIIX4_PROC_LEN);
> +    memory_region_add_subregion(parent, PIIX4_PROC_BASE, &s->io_cpu);
> +    s->cpu_added_notifier.notify = piix4_cpu_added_req;
> +    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
>  }
>  
>  static void enable_device(PIIX4PMState *s, int slot)
Eduardo Habkost - April 23, 2013, 1:58 p.m.
On Tue, Apr 23, 2013 at 03:43:51PM +0200, Juan Quintela wrote:
> Igor Mammedov <imammedo@redhat.com> wrote:
> > * introduce processor status bitmask visible to guest at 0xaf00 addr,
> >   where ACPI asl code expects it
> > * set bit corresponding to APIC ID in processor status bitmask on
> >   receiving CPU hot-plug notification
> > * trigger CPU hot-plug SCI, to notify guest about CPU hot-plug event
> >
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> 
> This is wrong (or at least supperfluous)
> 
> > +static int piix4_init_cpu_status(Object *obj, void *opaque)
> > +{
> > +    struct cpu_status *g = (struct cpu_status *)opaque;
> > +    Object *cpu_obj = object_dynamic_cast(obj, TYPE_CPU);
> > +
> > +    if (cpu_obj) {
> > +        struct Error *error = NULL;
> 
> we set error to NULL
> 
> > +        CPUClass *k = CPU_GET_CLASS(cpu_obj);
> > +        int64_t id = k->get_arch_id(CPU(cpu_obj));
> > +
> > +        if (error) {
> 
> and without touching error we test if it is != NULL.
> 
> 
> something is missing here?

Looks like a leftover from v1, that called object_property_get_int()
(with an error argument) instead of k->get_arch_id().
Igor Mammedov - April 23, 2013, 2:10 p.m.
On Tue, 23 Apr 2013 15:43:51 +0200
Juan Quintela <quintela@redhat.com> wrote:

> Igor Mammedov <imammedo@redhat.com> wrote:
> > * introduce processor status bitmask visible to guest at 0xaf00 addr,
> >   where ACPI asl code expects it
> > * set bit corresponding to APIC ID in processor status bitmask on
> >   receiving CPU hot-plug notification
> > * trigger CPU hot-plug SCI, to notify guest about CPU hot-plug event
> >
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> 
> This is wrong (or at least supperfluous)
> 
> > +static int piix4_init_cpu_status(Object *obj, void *opaque)
> > +{
> > +    struct cpu_status *g = (struct cpu_status *)opaque;
> > +    Object *cpu_obj = object_dynamic_cast(obj, TYPE_CPU);
> > +
> > +    if (cpu_obj) {
> > +        struct Error *error = NULL;
> 
> we set error to NULL
> 
> > +        CPUClass *k = CPU_GET_CLASS(cpu_obj);
> > +        int64_t id = k->get_arch_id(CPU(cpu_obj));
> > +
> > +        if (error) {
> 
> and without touching error we test if it is != NULL.
> 
> 
> something is missing here?

It's leftover from previous versions, all error handing should in this
function should be dropped, leaving only assert().

> 
> > +            fprintf(stderr, "failed to initilize CPU status for ACPI:
> > %s\n",
> > +                    error_get_pretty(error));
> > +            error_free(error);
> > +            abort();
> 
> Can't we return an error code at this point?  I guess no,  but asking
> will not hurt.
> 
> > +        }
> > +        g_assert((id / 8) < PIIX4_PROC_LEN);
> > +        g->sts[id / 8] |= (1 << (id % 8));
> > +    }
> > +    return object_child_foreach(obj, piix4_init_cpu_status, opaque);
> > +}
> > +
> >  static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
> >                                  PCIHotplugState state);
> >  
> > @@ -600,6 +704,13 @@ static void
> > piix4_acpi_system_hot_add_init(MemoryRegion *parent,
> > memory_region_add_subregion(parent, PCI_HOTPLUG_ADDR, &s->io_pci);
> >      pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev);
> > +
> > +    piix4_init_cpu_status(qdev_get_machine(), &s->gpe_cpu);
> > +    memory_region_init_io(&s->io_cpu, &cpu_hotplug_ops, s,
> > "apci-cpu-hotplug",
> > +                          PIIX4_PROC_LEN);
> > +    memory_region_add_subregion(parent, PIIX4_PROC_BASE, &s->io_cpu);
> > +    s->cpu_added_notifier.notify = piix4_cpu_added_req;
> > +    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
> >  }
> >  
> >  static void enable_device(PIIX4PMState *s, int slot)
>
Igor Mammedov - April 23, 2013, 2:51 p.m.
On Tue, 23 Apr 2013 16:04:22 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Tue, Apr 23, 2013 at 02:54:16PM +0200, Igor Mammedov wrote:
> > On Tue, 23 Apr 2013 13:38:10 +0200
> > Juan Quintela <quintela@redhat.com> wrote:
> > 
> > > Igor Mammedov <imammedo@redhat.com> wrote:
> > > 
> > > >  
> > > > +#define VMSTATE_CPU_STATUS_ARRAY(_field,
> > > > _state)                             \
> > > > +
> > > > {                                                                           \
> > > > +     .name       =
> > > > (stringify(_field)),                                      \
> > > > +     .version_id = 0,
> > > > \
> > > 
> > > this line should be:
> > >    .version_id = 4, 
> > > 
> > > 
> > > > +     .num        =
> > > > PIIX4_PROC_LEN,                                           \
> > > > +     .info       =
> > > > &vmstate_info_uint8,                                      \
> > > > +     .size       =
> > > > sizeof(uint8_t),                                          \
> > > > +     .flags      =
> > > > VMS_ARRAY,                                                \
> > > > +     .offset     = vmstate_offset_array(_state, _field,
> > > > uint8_t,             \
> > > > +
> > > > PIIX4_PROC_LEN),                     \
> > > > + }
> > > > +
> > > >  /* qemu-kvm 1.2 uses version 3 but advertised as 2
> > > >   * To support incoming qemu-kvm 1.2 migration, change version_id
> > > >   * and minimum_version_id to 2 below (which breaks migration from
> > > 
> > > > @@ -265,7 +289,7 @@ static int acpi_load_old(QEMUFile *f, void
> > > > *opaque, int version_id) */
> > > >  static const VMStateDescription vmstate_acpi = {
> > > >      .name = "piix4_pm",
> > > > -    .version_id = 3,
> > > > +    .version_id = 4,
> > > >      .minimum_version_id = 3,
> > > >      .minimum_version_id_old = 1,
> > > >      .load_state_old = acpi_load_old,
> > > > @@ -281,6 +305,7 @@ static const VMStateDescription vmstate_acpi = {
> > > >          VMSTATE_STRUCT(ar.gpe, PIIX4PMState, 2, vmstate_gpe,
> > > > ACPIGPE), VMSTATE_STRUCT(pci0_status, PIIX4PMState, 2,
> > > > vmstate_pci_status, struct pci_status),
> > > > +        VMSTATE_CPU_STATUS_ARRAY(gpe_cpu.sts, PIIX4PMState),
> > > 
> > > It is more,  I think that simply:
> > > 
> > > VMSTATE_UINT8_ARRAY_V(gpu_cpu.sts,  PIIX4PMstate, PIIX4_PROC_LEN,  4);
> > > 
> > > Should do the trick without the previous blob (it was needed for the old
> > > version because we have a uint32 data but we send a uint16 one).
> > > 
> > > Could you check?  I don't have an easy way to test that it "receives"
> > > the right value.
> > Just checked, it works with VMSTATE_UINT8_ARRAY_V as well,
> > 
> > but I have a question, why version should be 4, looking at git history
> > components of vmstate_acpi don't advance their version each time
> > vmstate_acpi change, they do it only when they themselves change.
> 
> Generally changing version breaks cross version migration.
> So please don't do it for an optional
> feature like CPU hotplug.
I probably wasn't clear enough, question was about why a new component
VMSTATE_UINT8_ARRAY_V(gpu_cpu.sts, ...) should be version 4 and not 0?

As for advancing vmstate_acpi version, it was requested:
http://lists.gnu.org/archive/html/qemu-devel/2013-03/msg04775.html



Anyway why chatting on IRC with Juan, question arises:
Do we really need to save/restore gpe_cpu.sts field?

Since target has to be started with all CPUs (including hot-plugged), it will
have the same gpe_cpu.sts bitmap.

> > > 
> > > Later,  Juan.
> > > 
> > > >          VMSTATE_END_OF_LIST()
> > > >      }
> > > >  };
> > > > @@ -585,6 +610,85 @@ static const MemoryRegionOps piix4_pci_ops = {
> > > 
>
Michael S. Tsirkin - April 23, 2013, 3:01 p.m.
On Tue, Apr 23, 2013 at 04:51:36PM +0200, Igor Mammedov wrote:
> On Tue, 23 Apr 2013 16:04:22 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Tue, Apr 23, 2013 at 02:54:16PM +0200, Igor Mammedov wrote:
> > > On Tue, 23 Apr 2013 13:38:10 +0200
> > > Juan Quintela <quintela@redhat.com> wrote:
> > > 
> > > > Igor Mammedov <imammedo@redhat.com> wrote:
> > > > 
> > > > >  
> > > > > +#define VMSTATE_CPU_STATUS_ARRAY(_field,
> > > > > _state)                             \
> > > > > +
> > > > > {                                                                           \
> > > > > +     .name       =
> > > > > (stringify(_field)),                                      \
> > > > > +     .version_id = 0,
> > > > > \
> > > > 
> > > > this line should be:
> > > >    .version_id = 4, 
> > > > 
> > > > 
> > > > > +     .num        =
> > > > > PIIX4_PROC_LEN,                                           \
> > > > > +     .info       =
> > > > > &vmstate_info_uint8,                                      \
> > > > > +     .size       =
> > > > > sizeof(uint8_t),                                          \
> > > > > +     .flags      =
> > > > > VMS_ARRAY,                                                \
> > > > > +     .offset     = vmstate_offset_array(_state, _field,
> > > > > uint8_t,             \
> > > > > +
> > > > > PIIX4_PROC_LEN),                     \
> > > > > + }
> > > > > +
> > > > >  /* qemu-kvm 1.2 uses version 3 but advertised as 2
> > > > >   * To support incoming qemu-kvm 1.2 migration, change version_id
> > > > >   * and minimum_version_id to 2 below (which breaks migration from
> > > > 
> > > > > @@ -265,7 +289,7 @@ static int acpi_load_old(QEMUFile *f, void
> > > > > *opaque, int version_id) */
> > > > >  static const VMStateDescription vmstate_acpi = {
> > > > >      .name = "piix4_pm",
> > > > > -    .version_id = 3,
> > > > > +    .version_id = 4,
> > > > >      .minimum_version_id = 3,
> > > > >      .minimum_version_id_old = 1,
> > > > >      .load_state_old = acpi_load_old,
> > > > > @@ -281,6 +305,7 @@ static const VMStateDescription vmstate_acpi = {
> > > > >          VMSTATE_STRUCT(ar.gpe, PIIX4PMState, 2, vmstate_gpe,
> > > > > ACPIGPE), VMSTATE_STRUCT(pci0_status, PIIX4PMState, 2,
> > > > > vmstate_pci_status, struct pci_status),
> > > > > +        VMSTATE_CPU_STATUS_ARRAY(gpe_cpu.sts, PIIX4PMState),
> > > > 
> > > > It is more,  I think that simply:
> > > > 
> > > > VMSTATE_UINT8_ARRAY_V(gpu_cpu.sts,  PIIX4PMstate, PIIX4_PROC_LEN,  4);
> > > > 
> > > > Should do the trick without the previous blob (it was needed for the old
> > > > version because we have a uint32 data but we send a uint16 one).
> > > > 
> > > > Could you check?  I don't have an easy way to test that it "receives"
> > > > the right value.
> > > Just checked, it works with VMSTATE_UINT8_ARRAY_V as well,
> > > 
> > > but I have a question, why version should be 4, looking at git history
> > > components of vmstate_acpi don't advance their version each time
> > > vmstate_acpi change, they do it only when they themselves change.
> > 
> > Generally changing version breaks cross version migration.
> > So please don't do it for an optional
> > feature like CPU hotplug.
> I probably wasn't clear enough, question was about why a new component
> VMSTATE_UINT8_ARRAY_V(gpu_cpu.sts, ...) should be version 4 and not 0?
> 
> As for advancing vmstate_acpi version, it was requested:
> http://lists.gnu.org/archive/html/qemu-devel/2013-03/msg04775.html

It says
	You need to either bump the version, or add a subsection.   The
	subsection is not needed until the first hot-(un)plug action.
please add a subsection, don't bump the version :)

> 
> 
> Anyway why chatting on IRC with Juan, question arises:
> Do we really need to save/restore gpe_cpu.sts field?
> 
> Since target has to be started with all CPUs (including hot-plugged), it will
> have the same gpe_cpu.sts bitmap.
> 
> > > > 
> > > > Later,  Juan.
> > > > 
> > > > >          VMSTATE_END_OF_LIST()
> > > > >      }
> > > > >  };
> > > > > @@ -585,6 +610,85 @@ static const MemoryRegionOps piix4_pci_ops = {
> > > > 
> >
Andreas Färber - April 24, 2013, 4:06 p.m.
Am 24.04.2013 17:58, schrieb Igor Mammedov:
> * introduce processor status bitmask visible to guest at 0xaf00 addr,
>   where ACPI asl code expects it
> * set bit corresponding to APIC ID in processor status bitmask on
>   receiving CPU hot-plug notification
> * trigger CPU hot-plug SCI, to notify guest about CPU hot-plug event
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> Note:
>   gpe_cpu.sts isn't need to be migrated, since CPU hotpluging during
>   migration just doesn't work, since destination QEMU has to be started
>   with all present in guest CPUs (including hotplugged).
>   i.e. src-qemu -smp 2,max-cpus=4; cpu-add id=2; dst-qemu -smp 3,max-cpus=4
>   Destination QEMU will recreate the same gpe_cpu.sts=t'111' bitmap as
>   on source by calling qemu_for_each_cpu(piix4_init_cpu_status, &s->gpe_cpu);
>   since it has been started with 3 CPUs on command line.
> 
> v6:
>   * drop gpe_cpu.sts migration hunks
> v5:
>   * add optional vmstate subsection if there was CPU hotplug event
>   * remove unused Error*
>   * use qemu_for_each_cpu() instead of recursion over QOM tree
> v4:
>   * added spec for QEMU-Seabios interface
>   * added PIIX4_ prefix to PROC_ defines
> v3:
>   * s/get_firmware_id()/get_arch_id()/ due rebase
>   * s/cpu_add_notifier/cpu_added_notifier/
> v2:
>   * use CPUClass.get_firmware_id() to make code target independent
>   * bump up vmstate_acpi version
> ---
>  docs/specs/acpi_cpu_hotplug.txt |   22 +++++++++
>  hw/acpi/piix4.c                 |   90 ++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 110 insertions(+), 2 deletions(-)
>  create mode 100644 docs/specs/acpi_cpu_hotplug.txt
> 
[...]
> diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
> index 88386d7..b18202c 100644
> --- a/hw/acpi/piix4.c
> +++ b/hw/acpi/piix4.c
> @@ -48,19 +48,28 @@
>  #define PCI_EJ_BASE 0xae08
>  #define PCI_RMV_BASE 0xae0c
>  
> +#define PIIX4_PROC_BASE 0xaf00
> +#define PIIX4_PROC_LEN 32
> +
>  #define PIIX4_PCI_HOTPLUG_STATUS 2
> +#define PIIX4_CPU_HOTPLUG_STATUS 4
>  
>  struct pci_status {
>      uint32_t up; /* deprecated, maintained for migration compatibility */
>      uint32_t down;
>  };
>  
> +struct cpu_status {

I see that you're copying pci_status above, but Coding Style asks for
CamelCase and typedef, so could we change this to CPUStatus or better
PIIX4CPUStatus? (I counted 5 occurrences)

Andreas

> +    uint8_t sts[PIIX4_PROC_LEN];
> +};
> +
>  typedef struct PIIX4PMState {
>      PCIDevice dev;
>  
>      MemoryRegion io;
>      MemoryRegion io_gpe;
>      MemoryRegion io_pci;
> +    MemoryRegion io_cpu;
>      ACPIREGS ar;
>  
>      APMState apm;
> @@ -82,6 +91,9 @@ typedef struct PIIX4PMState {
>      uint8_t disable_s3;
>      uint8_t disable_s4;
>      uint8_t s4_val;
> +
> +    struct cpu_status gpe_cpu;
> +    Notifier cpu_added_notifier;
>  } PIIX4PMState;
>  
>  static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
> @@ -100,8 +112,8 @@ static void pm_update_sci(PIIX4PMState *s)
>                     ACPI_BITMASK_POWER_BUTTON_ENABLE |
>                     ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
>                     ACPI_BITMASK_TIMER_ENABLE)) != 0) ||
> -        (((s->ar.gpe.sts[0] & s->ar.gpe.en[0])
> -          & PIIX4_PCI_HOTPLUG_STATUS) != 0);
> +        (((s->ar.gpe.sts[0] & s->ar.gpe.en[0]) &
> +          (PIIX4_PCI_HOTPLUG_STATUS | PIIX4_CPU_HOTPLUG_STATUS)) != 0);
>  
>      qemu_set_irq(s->irq, sci_level);
>      /* schedule a timer interruption if needed */
> @@ -585,6 +597,73 @@ static const MemoryRegionOps piix4_pci_ops = {
>      },
>  };
>  
> +static uint64_t cpu_status_read(void *opaque, hwaddr addr, unsigned width)
> +{
> +    PIIX4PMState *s = opaque;
> +    struct cpu_status *cpus = &s->gpe_cpu;
> +    uint64_t val = cpus->sts[addr];
> +
> +    return val;
> +}
> +
> +static void cpu_status_write(void *opaque, hwaddr addr, uint64_t data,
> +                             unsigned int size)
> +{
> +    /* TODO: implement VCPU removal on guest signal that CPU can be removed */
> +}
> +
> +static const MemoryRegionOps cpu_hotplug_ops = {
> +    .read = cpu_status_read,
> +    .write = cpu_status_write,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 1,
> +        .max_access_size = 1,
> +    },
> +};
> +
> +typedef enum {
> +    PLUG,
> +    UNPLUG,
> +} HotplugEventType;
> +
> +static void piix4_cpu_hotplug_req(PIIX4PMState *s, CPUState *cpu,
> +                                  HotplugEventType action)
> +{
> +    struct cpu_status *g = &s->gpe_cpu;
> +    ACPIGPE *gpe = &s->ar.gpe;
> +    CPUClass *k = CPU_GET_CLASS(cpu);
> +    int64_t cpu_id;
> +
> +    assert(s != NULL);
> +
> +    *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS;
> +    cpu_id = k->get_arch_id(CPU(cpu));
> +    if (action == PLUG) {
> +        g->sts[cpu_id / 8] |= (1 << (cpu_id % 8));
> +    } else {
> +        g->sts[cpu_id / 8] &= ~(1 << (cpu_id % 8));
> +    }
> +    pm_update_sci(s);
> +}
> +
> +static void piix4_cpu_added_req(Notifier *n, void *opaque)
> +{
> +    PIIX4PMState *s = container_of(n, PIIX4PMState, cpu_added_notifier);
> +
> +    piix4_cpu_hotplug_req(s, CPU(opaque), PLUG);
> +}
> +
> +static void piix4_init_cpu_status(CPUState *cpu, void *data)
> +{
> +    struct cpu_status *g = (struct cpu_status *)data;
> +    CPUClass *k = CPU_GET_CLASS(cpu);
> +    int64_t id = k->get_arch_id(cpu);
> +
> +    g_assert((id / 8) < PIIX4_PROC_LEN);
> +    g->sts[id / 8] |= (1 << (id % 8));
> +}
> +
>  static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
>                                  PCIHotplugState state);
>  
> @@ -600,6 +679,13 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
>      memory_region_add_subregion(parent, PCI_HOTPLUG_ADDR,
>                                  &s->io_pci);
>      pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev);
> +
> +    qemu_for_each_cpu(piix4_init_cpu_status, &s->gpe_cpu);
> +    memory_region_init_io(&s->io_cpu, &cpu_hotplug_ops, s, "apci-cpu-hotplug",
> +                          PIIX4_PROC_LEN);
> +    memory_region_add_subregion(parent, PIIX4_PROC_BASE, &s->io_cpu);
> +    s->cpu_added_notifier.notify = piix4_cpu_added_req;
> +    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
>  }
>  
>  static void enable_device(PIIX4PMState *s, int slot)
>
Igor Mammedov - April 24, 2013, 5:15 p.m.
On Wed, 24 Apr 2013 18:06:51 +0200
Andreas Färber <afaerber@suse.de> wrote:

> Am 24.04.2013 17:58, schrieb Igor Mammedov:
> > * introduce processor status bitmask visible to guest at 0xaf00 addr,
> >   where ACPI asl code expects it
> > * set bit corresponding to APIC ID in processor status bitmask on
> >   receiving CPU hot-plug notification
> > * trigger CPU hot-plug SCI, to notify guest about CPU hot-plug event
> > 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > Note:
> >   gpe_cpu.sts isn't need to be migrated, since CPU hotpluging during
> >   migration just doesn't work, since destination QEMU has to be started
> >   with all present in guest CPUs (including hotplugged).
> >   i.e. src-qemu -smp 2,max-cpus=4; cpu-add id=2; dst-qemu -smp 3,max-cpus=4
> >   Destination QEMU will recreate the same gpe_cpu.sts=t'111' bitmap as
> >   on source by calling qemu_for_each_cpu(piix4_init_cpu_status, &s->gpe_cpu);
> >   since it has been started with 3 CPUs on command line.
> > 
> > v6:
> >   * drop gpe_cpu.sts migration hunks
> > v5:
> >   * add optional vmstate subsection if there was CPU hotplug event
> >   * remove unused Error*
> >   * use qemu_for_each_cpu() instead of recursion over QOM tree
> > v4:
> >   * added spec for QEMU-Seabios interface
> >   * added PIIX4_ prefix to PROC_ defines
> > v3:
> >   * s/get_firmware_id()/get_arch_id()/ due rebase
> >   * s/cpu_add_notifier/cpu_added_notifier/
> > v2:
> >   * use CPUClass.get_firmware_id() to make code target independent
> >   * bump up vmstate_acpi version
> > ---
> >  docs/specs/acpi_cpu_hotplug.txt |   22 +++++++++
> >  hw/acpi/piix4.c                 |   90 ++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 110 insertions(+), 2 deletions(-)
> >  create mode 100644 docs/specs/acpi_cpu_hotplug.txt
> > 
> [...]
> > diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
> > index 88386d7..b18202c 100644
> > --- a/hw/acpi/piix4.c
> > +++ b/hw/acpi/piix4.c
> > @@ -48,19 +48,28 @@
> >  #define PCI_EJ_BASE 0xae08
> >  #define PCI_RMV_BASE 0xae0c
> >  
> > +#define PIIX4_PROC_BASE 0xaf00
> > +#define PIIX4_PROC_LEN 32
> > +
> >  #define PIIX4_PCI_HOTPLUG_STATUS 2
> > +#define PIIX4_CPU_HOTPLUG_STATUS 4
> >  
> >  struct pci_status {
> >      uint32_t up; /* deprecated, maintained for migration compatibility */
> >      uint32_t down;
> >  };
> >  
> > +struct cpu_status {
> 
> I see that you're copying pci_status above, but Coding Style asks for
> CamelCase and typedef, so could we change this to CPUStatus or better
> PIIX4CPUStatus? (I counted 5 occurrences)
sure, I'll resend  it with CPUStatus variant.

> Andreas
> 
> > +    uint8_t sts[PIIX4_PROC_LEN];
> > +};
> > +
> >  typedef struct PIIX4PMState {
> >      PCIDevice dev;
> >  
> >      MemoryRegion io;
> >      MemoryRegion io_gpe;
> >      MemoryRegion io_pci;
> > +    MemoryRegion io_cpu;
> >      ACPIREGS ar;
> >  
> >      APMState apm;
> > @@ -82,6 +91,9 @@ typedef struct PIIX4PMState {
> >      uint8_t disable_s3;
> >      uint8_t disable_s4;
> >      uint8_t s4_val;
> > +
> > +    struct cpu_status gpe_cpu;
> > +    Notifier cpu_added_notifier;
> >  } PIIX4PMState;
> >  
> >  static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
> > @@ -100,8 +112,8 @@ static void pm_update_sci(PIIX4PMState *s)
> >                     ACPI_BITMASK_POWER_BUTTON_ENABLE |
> >                     ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
> >                     ACPI_BITMASK_TIMER_ENABLE)) != 0) ||
> > -        (((s->ar.gpe.sts[0] & s->ar.gpe.en[0])
> > -          & PIIX4_PCI_HOTPLUG_STATUS) != 0);
> > +        (((s->ar.gpe.sts[0] & s->ar.gpe.en[0]) &
> > +          (PIIX4_PCI_HOTPLUG_STATUS | PIIX4_CPU_HOTPLUG_STATUS)) != 0);
> >  
> >      qemu_set_irq(s->irq, sci_level);
> >      /* schedule a timer interruption if needed */
> > @@ -585,6 +597,73 @@ static const MemoryRegionOps piix4_pci_ops = {
> >      },
> >  };
> >  
> > +static uint64_t cpu_status_read(void *opaque, hwaddr addr, unsigned width)
> > +{
> > +    PIIX4PMState *s = opaque;
> > +    struct cpu_status *cpus = &s->gpe_cpu;
> > +    uint64_t val = cpus->sts[addr];
> > +
> > +    return val;
> > +}
> > +
> > +static void cpu_status_write(void *opaque, hwaddr addr, uint64_t data,
> > +                             unsigned int size)
> > +{
> > +    /* TODO: implement VCPU removal on guest signal that CPU can be removed */
> > +}
> > +
> > +static const MemoryRegionOps cpu_hotplug_ops = {
> > +    .read = cpu_status_read,
> > +    .write = cpu_status_write,
> > +    .endianness = DEVICE_LITTLE_ENDIAN,
> > +    .valid = {
> > +        .min_access_size = 1,
> > +        .max_access_size = 1,
> > +    },
> > +};
> > +
> > +typedef enum {
> > +    PLUG,
> > +    UNPLUG,
> > +} HotplugEventType;
> > +
> > +static void piix4_cpu_hotplug_req(PIIX4PMState *s, CPUState *cpu,
> > +                                  HotplugEventType action)
> > +{
> > +    struct cpu_status *g = &s->gpe_cpu;
> > +    ACPIGPE *gpe = &s->ar.gpe;
> > +    CPUClass *k = CPU_GET_CLASS(cpu);
> > +    int64_t cpu_id;
> > +
> > +    assert(s != NULL);
> > +
> > +    *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS;
> > +    cpu_id = k->get_arch_id(CPU(cpu));
> > +    if (action == PLUG) {
> > +        g->sts[cpu_id / 8] |= (1 << (cpu_id % 8));
> > +    } else {
> > +        g->sts[cpu_id / 8] &= ~(1 << (cpu_id % 8));
> > +    }
> > +    pm_update_sci(s);
> > +}
> > +
> > +static void piix4_cpu_added_req(Notifier *n, void *opaque)
> > +{
> > +    PIIX4PMState *s = container_of(n, PIIX4PMState, cpu_added_notifier);
> > +
> > +    piix4_cpu_hotplug_req(s, CPU(opaque), PLUG);
> > +}
> > +
> > +static void piix4_init_cpu_status(CPUState *cpu, void *data)
> > +{
> > +    struct cpu_status *g = (struct cpu_status *)data;
> > +    CPUClass *k = CPU_GET_CLASS(cpu);
> > +    int64_t id = k->get_arch_id(cpu);
> > +
> > +    g_assert((id / 8) < PIIX4_PROC_LEN);
> > +    g->sts[id / 8] |= (1 << (id % 8));
> > +}
> > +
> >  static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
> >                                  PCIHotplugState state);
> >  
> > @@ -600,6 +679,13 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
> >      memory_region_add_subregion(parent, PCI_HOTPLUG_ADDR,
> >                                  &s->io_pci);
> >      pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev);
> > +
> > +    qemu_for_each_cpu(piix4_init_cpu_status, &s->gpe_cpu);
> > +    memory_region_init_io(&s->io_cpu, &cpu_hotplug_ops, s, "apci-cpu-hotplug",
> > +                          PIIX4_PROC_LEN);
> > +    memory_region_add_subregion(parent, PIIX4_PROC_BASE, &s->io_cpu);
> > +    s->cpu_added_notifier.notify = piix4_cpu_added_req;
> > +    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
> >  }
> >  
> >  static void enable_device(PIIX4PMState *s, int slot)
> > 
> 
> 
> -- 
> SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
> GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
>

Patch

diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
new file mode 100644
index 0000000..5dec0c5
--- /dev/null
+++ b/docs/specs/acpi_cpu_hotplug.txt
@@ -0,0 +1,22 @@ 
+QEMU<->ACPI BIOS CPU hotplug interface
+--------------------------------------
+
+QEMU supports CPU hotplug via ACPI. This document
+describes the interface between QEMU and the ACPI BIOS.
+
+ACPI GPE block (IO ports 0xafe0-0xafe3, byte access):
+-----------------------------------------
+
+Generic ACPI GPE block. Bit 2 (GPE.2) used to notify CPU
+hot-add/remove event to ACPI BIOS, via SCI interrupt.
+
+CPU present bitmap (IO port 0xaf00-0xae1f, 1-byte access):
+---------------------------------------------------------------
+One bit per CPU. Bit position reflects corresponding CPU APIC ID.
+Read-only.
+
+CPU hot-add/remove notification:
+-----------------------------------------------------
+QEMU sets/clears corresponding CPU bit on hot-add/remove event.
+CPU present map read by ACPI BIOS GPE.2 handler to notify OS of CPU
+hot-(un)plug events.
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 88386d7..b845123 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -48,19 +48,28 @@ 
 #define PCI_EJ_BASE 0xae08
 #define PCI_RMV_BASE 0xae0c
 
+#define PIIX4_PROC_BASE 0xaf00
+#define PIIX4_PROC_LEN 32
+
 #define PIIX4_PCI_HOTPLUG_STATUS 2
+#define PIIX4_CPU_HOTPLUG_STATUS 4
 
 struct pci_status {
     uint32_t up; /* deprecated, maintained for migration compatibility */
     uint32_t down;
 };
 
+struct cpu_status {
+    uint8_t sts[PIIX4_PROC_LEN];
+};
+
 typedef struct PIIX4PMState {
     PCIDevice dev;
 
     MemoryRegion io;
     MemoryRegion io_gpe;
     MemoryRegion io_pci;
+    MemoryRegion io_cpu;
     ACPIREGS ar;
 
     APMState apm;
@@ -82,6 +91,9 @@  typedef struct PIIX4PMState {
     uint8_t disable_s3;
     uint8_t disable_s4;
     uint8_t s4_val;
+
+    struct cpu_status gpe_cpu;
+    Notifier cpu_added_notifier;
 } PIIX4PMState;
 
 static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
@@ -100,8 +112,8 @@  static void pm_update_sci(PIIX4PMState *s)
                    ACPI_BITMASK_POWER_BUTTON_ENABLE |
                    ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
                    ACPI_BITMASK_TIMER_ENABLE)) != 0) ||
-        (((s->ar.gpe.sts[0] & s->ar.gpe.en[0])
-          & PIIX4_PCI_HOTPLUG_STATUS) != 0);
+        (((s->ar.gpe.sts[0] & s->ar.gpe.en[0]) &
+          (PIIX4_PCI_HOTPLUG_STATUS | PIIX4_CPU_HOTPLUG_STATUS)) != 0);
 
     qemu_set_irq(s->irq, sci_level);
     /* schedule a timer interruption if needed */
@@ -257,6 +269,18 @@  static int acpi_load_old(QEMUFile *f, void *opaque, int version_id)
     return ret;
 }
 
+#define VMSTATE_CPU_STATUS_ARRAY(_field, _state)                             \
+ {                                                                           \
+     .name       = (stringify(_field)),                                      \
+     .version_id = 0,                                                        \
+     .num        = PIIX4_PROC_LEN,                                           \
+     .info       = &vmstate_info_uint8,                                      \
+     .size       = sizeof(uint8_t),                                          \
+     .flags      = VMS_ARRAY,                                                \
+     .offset     = vmstate_offset_array(_state, _field, uint8_t,             \
+                                        PIIX4_PROC_LEN),                     \
+ }
+
 /* qemu-kvm 1.2 uses version 3 but advertised as 2
  * To support incoming qemu-kvm 1.2 migration, change version_id
  * and minimum_version_id to 2 below (which breaks migration from
@@ -265,7 +289,7 @@  static int acpi_load_old(QEMUFile *f, void *opaque, int version_id)
  */
 static const VMStateDescription vmstate_acpi = {
     .name = "piix4_pm",
-    .version_id = 3,
+    .version_id = 4,
     .minimum_version_id = 3,
     .minimum_version_id_old = 1,
     .load_state_old = acpi_load_old,
@@ -281,6 +305,7 @@  static const VMStateDescription vmstate_acpi = {
         VMSTATE_STRUCT(ar.gpe, PIIX4PMState, 2, vmstate_gpe, ACPIGPE),
         VMSTATE_STRUCT(pci0_status, PIIX4PMState, 2, vmstate_pci_status,
                        struct pci_status),
+        VMSTATE_CPU_STATUS_ARRAY(gpe_cpu.sts, PIIX4PMState),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -585,6 +610,85 @@  static const MemoryRegionOps piix4_pci_ops = {
     },
 };
 
+static uint64_t cpu_status_read(void *opaque, hwaddr addr, unsigned width)
+{
+    PIIX4PMState *s = opaque;
+    struct cpu_status *cpus = &s->gpe_cpu;
+    uint64_t val = cpus->sts[addr];
+
+    return val;
+}
+
+static void cpu_status_write(void *opaque, hwaddr addr, uint64_t data,
+                             unsigned int size)
+{
+    /* TODO: implement VCPU removal on guest signal that CPU can be removed */
+}
+
+static const MemoryRegionOps cpu_hotplug_ops = {
+    .read = cpu_status_read,
+    .write = cpu_status_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
+typedef enum {
+    PLUG,
+    UNPLUG,
+} HotplugEventType;
+
+static void piix4_cpu_hotplug_req(PIIX4PMState *s, CPUState *cpu,
+                                  HotplugEventType action)
+{
+    struct cpu_status *g = &s->gpe_cpu;
+    ACPIGPE *gpe = &s->ar.gpe;
+    CPUClass *k = CPU_GET_CLASS(cpu);
+    int64_t cpu_id;
+
+    assert(s != NULL);
+
+    *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS;
+    cpu_id = k->get_arch_id(CPU(cpu));
+    if (action == PLUG) {
+        g->sts[cpu_id / 8] |= (1 << (cpu_id % 8));
+    } else {
+        g->sts[cpu_id / 8] &= ~(1 << (cpu_id % 8));
+    }
+    pm_update_sci(s);
+}
+
+static void piix4_cpu_added_req(Notifier *n, void *opaque)
+{
+    PIIX4PMState *s = container_of(n, PIIX4PMState, cpu_added_notifier);
+
+    piix4_cpu_hotplug_req(s, CPU(opaque), PLUG);
+}
+
+static int piix4_init_cpu_status(Object *obj, void *opaque)
+{
+    struct cpu_status *g = (struct cpu_status *)opaque;
+    Object *cpu_obj = object_dynamic_cast(obj, TYPE_CPU);
+
+    if (cpu_obj) {
+        struct Error *error = NULL;
+        CPUClass *k = CPU_GET_CLASS(cpu_obj);
+        int64_t id = k->get_arch_id(CPU(cpu_obj));
+
+        if (error) {
+            fprintf(stderr, "failed to initilize CPU status for ACPI: %s\n",
+                    error_get_pretty(error));
+            error_free(error);
+            abort();
+        }
+        g_assert((id / 8) < PIIX4_PROC_LEN);
+        g->sts[id / 8] |= (1 << (id % 8));
+    }
+    return object_child_foreach(obj, piix4_init_cpu_status, opaque);
+}
+
 static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
                                 PCIHotplugState state);
 
@@ -600,6 +704,13 @@  static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
     memory_region_add_subregion(parent, PCI_HOTPLUG_ADDR,
                                 &s->io_pci);
     pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev);
+
+    piix4_init_cpu_status(qdev_get_machine(), &s->gpe_cpu);
+    memory_region_init_io(&s->io_cpu, &cpu_hotplug_ops, s, "apci-cpu-hotplug",
+                          PIIX4_PROC_LEN);
+    memory_region_add_subregion(parent, PIIX4_PROC_BASE, &s->io_cpu);
+    s->cpu_added_notifier.notify = piix4_cpu_added_req;
+    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
 }
 
 static void enable_device(PIIX4PMState *s, int slot)