diff mbox

[12/12] target-i386: implement CPU hot-add

Message ID 1363876125-8264-13-git-send-email-imammedo@redhat.com
State New
Headers show

Commit Message

Igor Mammedov March 21, 2013, 2:28 p.m. UTC
... via do_cpu_hot_add() hook called by cpu_set QMP command,
for x86 target.

* add extra check that APIC ID is in allowed range
* return error if CPU with requested APIC ID exists before creating
  a new instance. (CPU removal is broken now, will be fixed with CPU unplug)
* call CPU add notifier as the last step of x86_cpu_realizefn() to
  update rtc_cmos and trigger CPU hot-plug ACPI GPE to notify guest.
  Doing it from x86_cpu_realizefn() will allow to do the same when
  it would be possible to add CPU using device_add.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/i386/pc.c      |   22 ++++++++++++++++++++++
 target-i386/cpu.c |    1 +
 2 files changed, 23 insertions(+), 0 deletions(-)

Comments

Eric Blake March 22, 2013, 2:46 a.m. UTC | #1
On 03/21/2013 08:28 AM, Igor Mammedov wrote:
> ... via do_cpu_hot_add() hook called by cpu_set QMP command,
> for x86 target.
> 
> * add extra check that APIC ID is in allowed range

> +    if (x86_cpu_is_cpu_exist(qdev_get_machine(), &apic_id)) {
> +        error_setg(errp, "Unable to add CPU with ID: 0x%" PRIx64
> +                   ", it's already exists", apic_id);

s/it's/it/
Igor Mammedov March 25, 2013, 3:31 p.m. UTC | #2
On Thu, 21 Mar 2013 20:46:57 -0600
Eric Blake <eblake@redhat.com> wrote:

> On 03/21/2013 08:28 AM, Igor Mammedov wrote:
> > ... via do_cpu_hot_add() hook called by cpu_set QMP command,
> > for x86 target.
> > 
> > * add extra check that APIC ID is in allowed range
> 
> > +    if (x86_cpu_is_cpu_exist(qdev_get_machine(), &apic_id)) {
> > +        error_setg(errp, "Unable to add CPU with ID: 0x%" PRIx64
> > +                   ", it's already exists", apic_id);
> 
> s/it's/it/
> 

fixed on WIP branch:
https://github.com/imammedo/qemu/tree/cpu_set.WIP

Thanks!
Paolo Bonzini March 27, 2013, 11:19 a.m. UTC | #3
Il 21/03/2013 15:28, Igor Mammedov ha scritto:
> ... via do_cpu_hot_add() hook called by cpu_set QMP command,
> for x86 target.
> 
> * add extra check that APIC ID is in allowed range
> * return error if CPU with requested APIC ID exists before creating
>   a new instance. (CPU removal is broken now, will be fixed with CPU unplug)
> * call CPU add notifier as the last step of x86_cpu_realizefn() to
>   update rtc_cmos and trigger CPU hot-plug ACPI GPE to notify guest.
>   Doing it from x86_cpu_realizefn() will allow to do the same when
>   it would be possible to add CPU using device_add.
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
>  hw/i386/pc.c      |   22 ++++++++++++++++++++++
>  target-i386/cpu.c |    1 +
>  2 files changed, 23 insertions(+), 0 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 7481f73..e3ba9ee 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -867,6 +867,19 @@ static void pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
>  {
>      X86CPU *cpu;
>  
> +    if (apic_id >= pc_apic_id_limit(max_cpus)) {
> +        error_setg(errp, "Unable to add CPU with ID: 0x%" PRIx64
> +                   ", max allowed ID: 0x%x", apic_id,
> +                   pc_apic_id_limit(max_cpus) - 1);
> +        return;
> +    }

This seems the wrong place to do this check.  It should be done in
do_cpu_hot_add, simply comparing against max_cpus.  Here, instead, you
should _assert_ that the APIC ID is in range.

> +    if (x86_cpu_is_cpu_exist(qdev_get_machine(), &apic_id)) {

Similarly, can this be done in qmp_cpu_set?  And should it really be an
error?  Onlining an already-online CPU is fine.

Again, here you could assert that the CPU is not a duplicate, instead.

> +        error_setg(errp, "Unable to add CPU with ID: 0x%" PRIx64
> +                   ", it's already exists", apic_id);
> +        return;
> +    }
>      cpu = cpu_x86_create(cpu_model, errp);
>      if (!cpu) {
>          return;
> @@ -882,6 +895,8 @@ static void pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
>      }
>  }
>  
> +static const char *saved_cpu_model;
> +
>  void pc_cpus_init(const char *cpu_model)

Instead of using this global, see the approach in the previous patch.

>  {
>      int i;
> @@ -895,6 +910,8 @@ void pc_cpus_init(const char *cpu_model)
>          cpu_model = "qemu32";
>  #endif
>      }
> +    saved_cpu_model = cpu_model;
> +

>      for (i = 0; i < smp_cpus; i++) {
>          pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), &error);
> @@ -906,6 +923,11 @@ void pc_cpus_init(const char *cpu_model)
>      }
>  }
>  
> +void do_cpu_hot_add(const int64_t id, Error **errp)
> +{
> +    pc_new_cpu(saved_cpu_model, id, errp);
> +}
> +

Missing x86_cpu_apic_id_from_index(id)?

>  void pc_acpi_init(const char *default_dsdt)
>  {
>      char *filename = NULL, *arg = NULL;
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index ae46f81..d127141 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -2271,6 +2271,7 @@ out:
>      if (dev->hotplugged) {
>          cpu_synchronize_post_init(env);
>          resume_vcpu(CPU(cpu));
> +        qemu_system_cpu_hotplug_request(env->cpuid_apic_id);

As mentioned earlier, this notifier should be invoked at the CPU level,
not X86CPU.

Paolo

>      }
>  }
>  
>
Igor Mammedov April 3, 2013, 5:58 p.m. UTC | #4
On Wed, 27 Mar 2013 12:19:01 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 21/03/2013 15:28, Igor Mammedov ha scritto:
> > ... via do_cpu_hot_add() hook called by cpu_set QMP command,
> > for x86 target.
> > 
> > * add extra check that APIC ID is in allowed range
> > * return error if CPU with requested APIC ID exists before creating
> >   a new instance. (CPU removal is broken now, will be fixed with CPU unplug)
> > * call CPU add notifier as the last step of x86_cpu_realizefn() to
> >   update rtc_cmos and trigger CPU hot-plug ACPI GPE to notify guest.
> >   Doing it from x86_cpu_realizefn() will allow to do the same when
> >   it would be possible to add CPU using device_add.
> > 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> >  hw/i386/pc.c      |   22 ++++++++++++++++++++++
> >  target-i386/cpu.c |    1 +
> >  2 files changed, 23 insertions(+), 0 deletions(-)
> > 
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 7481f73..e3ba9ee 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -867,6 +867,19 @@ static void pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
> >  {
> >      X86CPU *cpu;
> >  
> > +    if (apic_id >= pc_apic_id_limit(max_cpus)) {
> > +        error_setg(errp, "Unable to add CPU with ID: 0x%" PRIx64
> > +                   ", max allowed ID: 0x%x", apic_id,
> > +                   pc_apic_id_limit(max_cpus) - 1);
> > +        return;
> > +    }
> 
> This seems the wrong place to do this check.  It should be done in
> do_cpu_hot_add, simply comparing against max_cpus.  Here, instead, you
> should _assert_ that the APIC ID is in range.
I'll move it there,  but I'd leave pc_apic_id_limit(max_cpus) since id it
compares with is APIC ID.

> 
> > +    if (x86_cpu_is_cpu_exist(qdev_get_machine(), &apic_id)) {
> 
> Similarly, can this be done in qmp_cpu_set?  And should it really be an
I've tried to abstract qmp part from target/implementation details.
We could introduce global func like, and then use it if from QMP subsystem:
  bool is_cpu_exist(int64_t id) {
     ... iterate over CPUs ...
     if (cpu->get_firmware_id() == id)
         return true;
     ...
  }

Andreas, Is it acceptable if I add it to qom/cpu.h, qom/cpu.c ?

> error?  Onlining an already-online CPU is fine.
CPU is not just onlined though, it's created and it couldn't be right to
create the same CPU. Hence a error here, so user would know that it tries to
add the same device.

> 
> Again, here you could assert that the CPU is not a duplicate, instead.
sure

> 
> > +        error_setg(errp, "Unable to add CPU with ID: 0x%" PRIx64
> > +                   ", it's already exists", apic_id);
> > +        return;
> > +    }
> >      cpu = cpu_x86_create(cpu_model, errp);
> >      if (!cpu) {
> >          return;
> > @@ -882,6 +895,8 @@ static void pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
> >      }
> >  }
> >  
> > +static const char *saved_cpu_model;
> > +
> >  void pc_cpus_init(const char *cpu_model)
> 
> Instead of using this global, see the approach in the previous patch.
> 
> >  {
> >      int i;
> > @@ -895,6 +910,8 @@ void pc_cpus_init(const char *cpu_model)
> >          cpu_model = "qemu32";
> >  #endif
> >      }
> > +    saved_cpu_model = cpu_model;
> > +
> 
> >      for (i = 0; i < smp_cpus; i++) {
> >          pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), &error);
> > @@ -906,6 +923,11 @@ void pc_cpus_init(const char *cpu_model)
> >      }
> >  }
> >  
> > +void do_cpu_hot_add(const int64_t id, Error **errp)
> > +{
> > +    pc_new_cpu(saved_cpu_model, id, errp);
> > +}
> > +
> 
> Missing x86_cpu_apic_id_from_index(id)?
There was(is?) opposition to using cpu_index to identify x86 CPU.
So, it is expected from management to provide APIC ID instead of cpu_index.
It could be useful to make hotplug to a specific NUMA node/cpu to work in
future.
Though interface of possible APIC IDs discovery is not part of this series.

> 
> >  void pc_acpi_init(const char *default_dsdt)
> >  {
> >      char *filename = NULL, *arg = NULL;
> > diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> > index ae46f81..d127141 100644
> > --- a/target-i386/cpu.c
> > +++ b/target-i386/cpu.c
> > @@ -2271,6 +2271,7 @@ out:
> >      if (dev->hotplugged) {
> >          cpu_synchronize_post_init(env);
> >          resume_vcpu(CPU(cpu));
> > +        qemu_system_cpu_hotplug_request(env->cpuid_apic_id);
> 
> As mentioned earlier, this notifier should be invoked at the CPU level,
> not X86CPU.
done

> Paolo
> 
> >      }
> >  }
> >  
> > 
> 
>
Eduardo Habkost April 3, 2013, 6:10 p.m. UTC | #5
On Wed, Apr 03, 2013 at 07:58:00PM +0200, Igor Mammedov wrote:
<snip>
> > > +void do_cpu_hot_add(const int64_t id, Error **errp)
> > > +{
> > > +    pc_new_cpu(saved_cpu_model, id, errp);
> > > +}
> > > +
> > 
> > Missing x86_cpu_apic_id_from_index(id)?
> There was(is?) opposition to using cpu_index to identify x86 CPU.

Really? Do you have a pointer to the discussion?


> So, it is expected from management to provide APIC ID instead of cpu_index.
> It could be useful to make hotplug to a specific NUMA node/cpu to work in
> future.
> Though interface of possible APIC IDs discovery is not part of this series.

That's exactly the opposite of what I expect. The APIC ID is an internal
implementation detail, and external tools must _not_ be required to deal
with it and to calculate it.

Communication with the BIOS, on the other hand, is entirely based on the
APIC ID, and not CPU indexes. So QEMU needs to translate the CPU indexes
(used to communicate with the outside world) to APIC IDs when talking to
the BIOS.
Andreas Färber April 3, 2013, 6:22 p.m. UTC | #6
Am 03.04.2013 19:58, schrieb Igor Mammedov:
> On Wed, 27 Mar 2013 12:19:01 +0100
> Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
>> Il 21/03/2013 15:28, Igor Mammedov ha scritto:
>>> +    if (x86_cpu_is_cpu_exist(qdev_get_machine(), &apic_id)) {
>>
>> Similarly, can this be done in qmp_cpu_set?  And should it really be an
> I've tried to abstract qmp part from target/implementation details.
> We could introduce global func like, and then use it if from QMP subsystem:
>   bool is_cpu_exist(int64_t id) {
>      ... iterate over CPUs ...
>      if (cpu->get_firmware_id() == id)
>          return true;
>      ...
>   }
> 
> Andreas, Is it acceptable if I add it to qom/cpu.h,

If you make it cpu_exists() then sure,

> qom/cpu.c ?

but this is not going to work yet, you may need to place it in cpus.c
due to first_cpu and next_cpu operating on CPUArchState.

Andreas
Igor Mammedov April 3, 2013, 6:59 p.m. UTC | #7
On Wed, 3 Apr 2013 15:10:05 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Wed, Apr 03, 2013 at 07:58:00PM +0200, Igor Mammedov wrote:
> <snip>
> > > > +void do_cpu_hot_add(const int64_t id, Error **errp)
> > > > +{
> > > > +    pc_new_cpu(saved_cpu_model, id, errp);
> > > > +}
> > > > +
> > > 
> > > Missing x86_cpu_apic_id_from_index(id)?
> > There was(is?) opposition to using cpu_index to identify x86 CPU.
> 
> Really? Do you have a pointer to the discussion?
Here is what I could find in my mail box:
http://lists.gnu.org/archive/html/qemu-devel/2012-05/msg02770.html
Jan could correct me if I'm wrong.

> 
> 
> > So, it is expected from management to provide APIC ID instead of cpu_index.
> > It could be useful to make hotplug to a specific NUMA node/cpu to work in
> > future.
> > Though interface of possible APIC IDs discovery is not part of this series.
> 
> That's exactly the opposite of what I expect. The APIC ID is an internal
> implementation detail, and external tools must _not_ be required to deal
> with it and to calculate it.
> 
> Communication with the BIOS, on the other hand, is entirely based on the
> APIC ID, and not CPU indexes. So QEMU needs to translate the CPU indexes
> (used to communicate with the outside world) to APIC IDs when talking to
> the BIOS.
cpu_index won't work nicely with hot-adding CPU to specific numa node though.
with APIC ID (mgmt might treat it as opaque) we could expose something like

/machine/icc-bridge/link<CPU[apic_id_n]
...

for all possible CPUs, with empty links for non existing ones.

and later add on something like this:

/machine/numa_node[x]/link<CPU[apic_id_n]>
...

Libvirt than could just pickup ready apic id from desired place and add CPU
either using cpu-add id=xxx or device_add x86-cpu-...,apic_id=xxx

+1 more cpu_index is QEMU implementation detail and we could not add to x86 CPU
cpu-index property since hardware doesn't have such feature, so it won't be
available with device_add.
 
> 
> -- 
> Eduardo
>
Igor Mammedov April 3, 2013, 7:01 p.m. UTC | #8
On Wed, 03 Apr 2013 20:22:17 +0200
Andreas Färber <afaerber@suse.de> wrote:

> Am 03.04.2013 19:58, schrieb Igor Mammedov:
> > On Wed, 27 Mar 2013 12:19:01 +0100
> > Paolo Bonzini <pbonzini@redhat.com> wrote:
> > 
> >> Il 21/03/2013 15:28, Igor Mammedov ha scritto:
> >>> +    if (x86_cpu_is_cpu_exist(qdev_get_machine(), &apic_id)) {
> >>
> >> Similarly, can this be done in qmp_cpu_set?  And should it really be an
> > I've tried to abstract qmp part from target/implementation details.
> > We could introduce global func like, and then use it if from QMP subsystem:
> >   bool is_cpu_exist(int64_t id) {
> >      ... iterate over CPUs ...
> >      if (cpu->get_firmware_id() == id)
> >          return true;
> >      ...
> >   }
> > 
> > Andreas, Is it acceptable if I add it to qom/cpu.h,
> 
> If you make it cpu_exists() then sure,
> 
> > qom/cpu.c ?
> 
> but this is not going to work yet, you may need to place it in cpus.c
> due to first_cpu and next_cpu operating on CPUArchState.

I actually do recursive search on QOM tree /machine for TYPE_CPU avoiding
usage of first_cpu and next_cpu. So it's abstracted from CPUArchState.

> 
> Andreas
> 
> -- 
> SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
> GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
>
Eduardo Habkost April 3, 2013, 7:27 p.m. UTC | #9
On Wed, Apr 03, 2013 at 08:59:07PM +0200, Igor Mammedov wrote:
> On Wed, 3 Apr 2013 15:10:05 -0300
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
> > On Wed, Apr 03, 2013 at 07:58:00PM +0200, Igor Mammedov wrote:
> > <snip>
> > > > > +void do_cpu_hot_add(const int64_t id, Error **errp)
> > > > > +{
> > > > > +    pc_new_cpu(saved_cpu_model, id, errp);
> > > > > +}
> > > > > +
> > > > 
> > > > Missing x86_cpu_apic_id_from_index(id)?
> > > There was(is?) opposition to using cpu_index to identify x86 CPU.
> > 
> > Really? Do you have a pointer to the discussion?
> Here is what I could find in my mail box:
> http://lists.gnu.org/archive/html/qemu-devel/2012-05/msg02770.html
> Jan could correct me if I'm wrong.



So, quoting Jan:
> From my POV, cpu_index could become equal to the physical APIC ID.
> As long as we can set it freely (provided it remains unique) and
> non-continuously, we don't need separate indexes."

We can't choose APIC IDs freely, because the APIC ID is calculated based
on the CPU topology (socket + core + thread IDs).

So, the cpu_index could be the same as the APIC ID if the cpu_index
value declared opaque, being just a "CPU identifier" that is chosen by
QEMU arbitrarily (and that happens to match the APIC ID). But we will
probably have some problems with this:

- The CPU index are currently allocated contiguously, and probably
  existing interfaces already assume that (e.g. the "-numa' option,
  "info numa", "info cpus" and maybe other monitor commands)
- QEMU must be responsible for calculating the APIC ID of each CPU,
  because it is based on the CPU topology.
- If QEMU is the one who calculates the APIC ID, what kind of identifier
  we can use for a CPU object in the command-line (e.g. in the "-numa"
  option)?
- We may need to redefine the meaning of the "maxcpus" -smp option, if
  all our interfaces are now based in non-contiguous and freely-set CPU
  identifiers.

In short, getting rid of the contiguous CPU indexes sounds very
difficult. We could introduce other kind of identifiers, but probably we
may need to keep the CPU indexes contiguous to keep existing interfaces
working.

> 
> > 
> > 
> > > So, it is expected from management to provide APIC ID instead of cpu_index.
> > > It could be useful to make hotplug to a specific NUMA node/cpu to work in
> > > future.
> > > Though interface of possible APIC IDs discovery is not part of this series.
> > 
> > That's exactly the opposite of what I expect. The APIC ID is an internal
> > implementation detail, and external tools must _not_ be required to deal
> > with it and to calculate it.
> > 
> > Communication with the BIOS, on the other hand, is entirely based on the
> > APIC ID, and not CPU indexes. So QEMU needs to translate the CPU indexes
> > (used to communicate with the outside world) to APIC IDs when talking to
> > the BIOS.
> cpu_index won't work nicely with hot-adding CPU to specific numa node though.

Well, the "-numa node" options are already based on CPU indexes, so it
would match it the existing NUMA configuration interface.

> with APIC ID (mgmt might treat it as opaque) we could expose something like
> 
> /machine/icc-bridge/link<CPU[apic_id_n]
> ...
> 
> for all possible CPUs, with empty links for non existing ones.
> 
> and later add on something like this:
> 
> /machine/numa_node[x]/link<CPU[apic_id_n]>
> ...
> 
> Libvirt than could just pickup ready apic id from desired place and add CPU
> either using cpu-add id=xxx or device_add x86-cpu-...,apic_id=xxx
> 
> +1 more cpu_index is QEMU implementation detail and we could not add to x86 CPU
> cpu-index property since hardware doesn't have such feature, so it won't be
> available with device_add.

I don't mind hiding cpu_index too. I don't mind if we use a cpu_index,
QOM links, arbitrary IDs set by the user. I just have a problem with
requiring libvirt to set the APIC ID.

If you give libvirt an easy way to convert a CPU "location" (index, numa
node, whatever) to an APIC ID that is pre-calculated by QEMU, then it
could work. But do we really need to require libvirt to deal with APIC
ID directly? If you just set the links properly to reflect the CPU
"location", the CPU could calculate its APIC ID based on its "location"
using the links.
Igor Mammedov April 3, 2013, 8:09 p.m. UTC | #10
On Wed, 3 Apr 2013 16:27:11 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Wed, Apr 03, 2013 at 08:59:07PM +0200, Igor Mammedov wrote:
> > On Wed, 3 Apr 2013 15:10:05 -0300
> > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > 
> > > On Wed, Apr 03, 2013 at 07:58:00PM +0200, Igor Mammedov wrote:
> > > <snip>
> > > > > > +void do_cpu_hot_add(const int64_t id, Error **errp)
> > > > > > +{
> > > > > > +    pc_new_cpu(saved_cpu_model, id, errp);
> > > > > > +}
> > > > > > +
> > > > > 
> > > > > Missing x86_cpu_apic_id_from_index(id)?
> > > > There was(is?) opposition to using cpu_index to identify x86 CPU.
> > > 
> > > Really? Do you have a pointer to the discussion?
> > Here is what I could find in my mail box:
> > http://lists.gnu.org/archive/html/qemu-devel/2012-05/msg02770.html
> > Jan could correct me if I'm wrong.
> 
> 
> 
> So, quoting Jan:
> > From my POV, cpu_index could become equal to the physical APIC ID.
> > As long as we can set it freely (provided it remains unique) and
> > non-continuously, we don't need separate indexes."
> 
> We can't choose APIC IDs freely, because the APIC ID is calculated based
> on the CPU topology (socket + core + thread IDs).
> 
> So, the cpu_index could be the same as the APIC ID if the cpu_index
> value declared opaque, being just a "CPU identifier" that is chosen by
> QEMU arbitrarily (and that happens to match the APIC ID). But we will
> probably have some problems with this:
> 
> - The CPU index are currently allocated contiguously, and probably
>   existing interfaces already assume that (e.g. the "-numa' option,
>   "info numa", "info cpus" and maybe other monitor commands)
> - QEMU must be responsible for calculating the APIC ID of each CPU,
>   because it is based on the CPU topology.
> - If QEMU is the one who calculates the APIC ID, what kind of identifier
>   we can use for a CPU object in the command-line (e.g. in the "-numa"
>   option)?
using any kind of thread id is problematic since 2 treads from the same core
could end-up on different nodes.
Maybe placement interface could be better described as node[n]=sockets_list?
 
> - We may need to redefine the meaning of the "maxcpus" -smp option, if
>   all our interfaces are now based in non-contiguous and freely-set CPU
>   identifiers.
it's amount of CPUs available to guest, pretty clear from user's POV.
 
> 
> In short, getting rid of the contiguous CPU indexes sounds very
> difficult. We could introduce other kind of identifiers, but probably we
> may need to keep the CPU indexes contiguous to keep existing interfaces
> working.
Once we have CPU unplug, we will have non-contiguous cpu_index. So it will be
part of CPU unplug series to fix cpu_index allocation/usage where necessary.

> 
> > 
> > > 
> > > 
> > > > So, it is expected from management to provide APIC ID instead of cpu_index.
> > > > It could be useful to make hotplug to a specific NUMA node/cpu to work in
> > > > future.
> > > > Though interface of possible APIC IDs discovery is not part of this series.
> > > 
> > > That's exactly the opposite of what I expect. The APIC ID is an internal
> > > implementation detail, and external tools must _not_ be required to deal
> > > with it and to calculate it.
> > > 
> > > Communication with the BIOS, on the other hand, is entirely based on the
> > > APIC ID, and not CPU indexes. So QEMU needs to translate the CPU indexes
> > > (used to communicate with the outside world) to APIC IDs when talking to
> > > the BIOS.
> > cpu_index won't work nicely with hot-adding CPU to specific numa node though.
> 
> Well, the "-numa node" options are already based on CPU indexes, so it
> would match it the existing NUMA configuration interface.
> 
> > with APIC ID (mgmt might treat it as opaque) we could expose something like
> > 
> > /machine/icc-bridge/link<CPU[apic_id_n]
> > ...
> > 
> > for all possible CPUs, with empty links for non existing ones.
> > 
> > and later add on something like this:
> > 
> > /machine/numa_node[x]/link<CPU[apic_id_n]>
> > ...
> > 
> > Libvirt than could just pickup ready apic id from desired place and add CPU
> > either using cpu-add id=xxx or device_add x86-cpu-...,apic_id=xxx
> > 
> > +1 more cpu_index is QEMU implementation detail and we could not add to x86 CPU
> > cpu-index property since hardware doesn't have such feature, so it won't be
> > available with device_add.
> 
> I don't mind hiding cpu_index too. I don't mind if we use a cpu_index,
> QOM links, arbitrary IDs set by the user. I just have a problem with
> requiring libvirt to set the APIC ID.
> 
> If you give libvirt an easy way to convert a CPU "location" (index, numa
> node, whatever) to an APIC ID that is pre-calculated by QEMU, then it
> could work. But do we really need to require libvirt to deal with APIC
> ID directly? If you just set the links properly to reflect the CPU
> "location", the CPU could calculate its APIC ID based on its "location"
> using the links.
What about adding CPU to a specific node then, it would require interface for
communicating to CPU to which node it should be plugged (part of APIC ID, I
guess).

> 
> -- 
> Eduardo
>
Eduardo Habkost April 3, 2013, 8:57 p.m. UTC | #11
On Wed, Apr 03, 2013 at 10:09:25PM +0200, Igor Mammedov wrote:
> On Wed, 3 Apr 2013 16:27:11 -0300
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
> > On Wed, Apr 03, 2013 at 08:59:07PM +0200, Igor Mammedov wrote:
> > > On Wed, 3 Apr 2013 15:10:05 -0300
> > > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > > 
> > > > On Wed, Apr 03, 2013 at 07:58:00PM +0200, Igor Mammedov wrote:
> > > > <snip>
> > > > > > > +void do_cpu_hot_add(const int64_t id, Error **errp)
> > > > > > > +{
> > > > > > > +    pc_new_cpu(saved_cpu_model, id, errp);
> > > > > > > +}
> > > > > > > +
> > > > > > 
> > > > > > Missing x86_cpu_apic_id_from_index(id)?
> > > > > There was(is?) opposition to using cpu_index to identify x86 CPU.
> > > > 
> > > > Really? Do you have a pointer to the discussion?
> > > Here is what I could find in my mail box:
> > > http://lists.gnu.org/archive/html/qemu-devel/2012-05/msg02770.html
> > > Jan could correct me if I'm wrong.
> > 
> > 
> > 
> > So, quoting Jan:
> > > From my POV, cpu_index could become equal to the physical APIC ID.
> > > As long as we can set it freely (provided it remains unique) and
> > > non-continuously, we don't need separate indexes."
> > 
> > We can't choose APIC IDs freely, because the APIC ID is calculated based
> > on the CPU topology (socket + core + thread IDs).
> > 
> > So, the cpu_index could be the same as the APIC ID if the cpu_index
> > value declared opaque, being just a "CPU identifier" that is chosen by
> > QEMU arbitrarily (and that happens to match the APIC ID). But we will
> > probably have some problems with this:
> > 
> > - The CPU index are currently allocated contiguously, and probably
> >   existing interfaces already assume that (e.g. the "-numa' option,
> >   "info numa", "info cpus" and maybe other monitor commands)
> > - QEMU must be responsible for calculating the APIC ID of each CPU,
> >   because it is based on the CPU topology.
> > - If QEMU is the one who calculates the APIC ID, what kind of identifier
> >   we can use for a CPU object in the command-line (e.g. in the "-numa"
> >   option)?
> using any kind of thread id is problematic since 2 treads from the same core
> could end-up on different nodes.
> Maybe placement interface could be better described as node[n]=sockets_list?

The problem here is compatibility: we need to keep existing
command-lines working. And the existing interface (among other things)
doesn't prevent two threads from being in different NUMA nodes.

If today "-smp 9,cores=3,threads=3 -numa node,cpus=0-4 -numa
nodes,cpus=5-8" has a specific meaning, we need to keep the same
meaning.

>  
> > - We may need to redefine the meaning of the "maxcpus" -smp option, if
> >   all our interfaces are now based in non-contiguous and freely-set CPU
> >   identifiers.
> it's amount of CPUs available to guest, pretty clear from user's POV.

I was just worrying if there could be assumptions that "maxcpus is
always > cpu_index". But you are probably right.

>  
> > 
> > In short, getting rid of the contiguous CPU indexes sounds very
> > difficult. We could introduce other kind of identifiers, but probably we
> > may need to keep the CPU indexes contiguous to keep existing interfaces
> > working.
> Once we have CPU unplug, we will have non-contiguous cpu_index. So it will be
> part of CPU unplug series to fix cpu_index allocation/usage where necessary.

Keeping compatibility after CPU unplug is not a problem as CPU unplug
doesn't exist yet. The problem here is to have a realiable identifier
for CPUs that can be used in the command-line. The only identifier we
have for that today is a contiguous CPU index, and if we make them not
contiguous we are going to make existing command-lines that use CPU
indexes (e.g. using "-numa") break.

> 
> > 
> > > 
> > > > 
> > > > 
> > > > > So, it is expected from management to provide APIC ID instead of cpu_index.
> > > > > It could be useful to make hotplug to a specific NUMA node/cpu to work in
> > > > > future.
> > > > > Though interface of possible APIC IDs discovery is not part of this series.
> > > > 
> > > > That's exactly the opposite of what I expect. The APIC ID is an internal
> > > > implementation detail, and external tools must _not_ be required to deal
> > > > with it and to calculate it.
> > > > 
> > > > Communication with the BIOS, on the other hand, is entirely based on the
> > > > APIC ID, and not CPU indexes. So QEMU needs to translate the CPU indexes
> > > > (used to communicate with the outside world) to APIC IDs when talking to
> > > > the BIOS.
> > > cpu_index won't work nicely with hot-adding CPU to specific numa node though.
> > 
> > Well, the "-numa node" options are already based on CPU indexes, so it
> > would match it the existing NUMA configuration interface.
> > 
> > > with APIC ID (mgmt might treat it as opaque) we could expose something like
> > > 
> > > /machine/icc-bridge/link<CPU[apic_id_n]
> > > ...
> > > 
> > > for all possible CPUs, with empty links for non existing ones.
> > > 
> > > and later add on something like this:
> > > 
> > > /machine/numa_node[x]/link<CPU[apic_id_n]>
> > > ...
> > > 
> > > Libvirt than could just pickup ready apic id from desired place and add CPU
> > > either using cpu-add id=xxx or device_add x86-cpu-...,apic_id=xxx
> > > 
> > > +1 more cpu_index is QEMU implementation detail and we could not add to x86 CPU
> > > cpu-index property since hardware doesn't have such feature, so it won't be
> > > available with device_add.
> > 
> > I don't mind hiding cpu_index too. I don't mind if we use a cpu_index,
> > QOM links, arbitrary IDs set by the user. I just have a problem with
> > requiring libvirt to set the APIC ID.
> > 
> > If you give libvirt an easy way to convert a CPU "location" (index, numa
> > node, whatever) to an APIC ID that is pre-calculated by QEMU, then it
> > could work. But do we really need to require libvirt to deal with APIC
> > ID directly? If you just set the links properly to reflect the CPU
> > "location", the CPU could calculate its APIC ID based on its "location"
> > using the links.
> What about adding CPU to a specific node then, it would require interface for
> communicating to CPU to which node it should be plugged (part of APIC ID, I
> guess).

Using QOM we could just use links. The question to me is how to identify
the CPU "location" reliably if we're going to in a "cpu-set" interface.
My point is that cpu_index works perfectly for that (as long as the
rules about how each CPU index is allocated to each NUMA-node, socket,
core, and thread). Later we can have something not based on CPU indexes,
if we move to a 100% link-based QOM interface.

Having to ask QEMU for the APIC ID somehow and requiring the APIC ID to
be provided on the cpu-set command could work, yes. But it must not
require libvirt to calculate and choose the APIC IDs itself.

(Note that I didn't review all the code yet. Maybe you are already doing
all that)
diff mbox

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7481f73..e3ba9ee 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -867,6 +867,19 @@  static void pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
 {
     X86CPU *cpu;
 
+    if (apic_id >= pc_apic_id_limit(max_cpus)) {
+        error_setg(errp, "Unable to add CPU with ID: 0x%" PRIx64
+                   ", max allowed ID: 0x%x", apic_id,
+                   pc_apic_id_limit(max_cpus) - 1);
+        return;
+    }
+
+    if (x86_cpu_is_cpu_exist(qdev_get_machine(), &apic_id)) {
+        error_setg(errp, "Unable to add CPU with ID: 0x%" PRIx64
+                   ", it's already exists", apic_id);
+        return;
+    }
+
     cpu = cpu_x86_create(cpu_model, errp);
     if (!cpu) {
         return;
@@ -882,6 +895,8 @@  static void pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
     }
 }
 
+static const char *saved_cpu_model;
+
 void pc_cpus_init(const char *cpu_model)
 {
     int i;
@@ -895,6 +910,8 @@  void pc_cpus_init(const char *cpu_model)
         cpu_model = "qemu32";
 #endif
     }
+    saved_cpu_model = cpu_model;
+
 
     for (i = 0; i < smp_cpus; i++) {
         pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), &error);
@@ -906,6 +923,11 @@  void pc_cpus_init(const char *cpu_model)
     }
 }
 
+void do_cpu_hot_add(const int64_t id, Error **errp)
+{
+    pc_new_cpu(saved_cpu_model, id, errp);
+}
+
 void pc_acpi_init(const char *default_dsdt)
 {
     char *filename = NULL, *arg = NULL;
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index ae46f81..d127141 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2271,6 +2271,7 @@  out:
     if (dev->hotplugged) {
         cpu_synchronize_post_init(env);
         resume_vcpu(CPU(cpu));
+        qemu_system_cpu_hotplug_request(env->cpuid_apic_id);
     }
 }