diff mbox

[18/19] target-i386: expose all possible CPUs as /machine/icc-bridge/cpu[0..N] links

Message ID 1365691918-30594-19-git-send-email-imammedo@redhat.com
State New
Headers show

Commit Message

Igor Mammedov April 11, 2013, 2:51 p.m. UTC
... and leave links for not present CPUs empty.

It will allow users to query for possible APIC IDs and use them
with cpu-add QMP command.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
v2:
 * s/get_firmware_id/get_arch_id/ due to rebase
 * rename cpu_add_notifier to cpu_added_notifier &
   icc_bridge_cpu_add_req -> icc_bridge_cpued_add_req
---
 hw/cpu/icc_bus.c          | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 hw/i386/pc.c              |  9 +++++++--
 include/hw/i386/icc_bus.h |  2 ++
 3 files changed, 55 insertions(+), 2 deletions(-)

Comments

Igor Mammedov April 12, 2013, 10:01 a.m. UTC | #1
On Thu, 11 Apr 2013 14:19:37 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Thu, Apr 11, 2013 at 04:51:57PM +0200, Igor Mammedov wrote:
> > ... and leave links for not present CPUs empty.
> > 
> > It will allow users to query for possible APIC IDs and use them
> > with cpu-add QMP command.
> > 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> 
> I don't see anything wrong with having icc-bridge links as well, but I
> would really like to have a target-independent namespace with links,
> that could be used to query for the available/valid CPU IDs for cpu-add
> commands instead of icc-bridge. The IDs on that namespace could be
> considered completely opaque.

Considering that -numa in present state is not compatible with cpu-add
and that all CPU ID in this case are are sequence [0..maxcpus-1], this
patch could be dropped without any harm. libvirt could just use
numbers from this sequence like it's doing with current cpu_set without
any ID discovery. 

So, I've postponed target independent until we have -numa reworked,
then we could have /machine/node/socket/cpu containers with links.
The problem that needs to be solved, is the links storage ownership.
Who should allocate and own it? If machine was QOM object already,
I'd go with machine but it's not yet.

> 
> > ---
> > v2:
> >  * s/get_firmware_id/get_arch_id/ due to rebase
> >  * rename cpu_add_notifier to cpu_added_notifier &
> >    icc_bridge_cpu_add_req -> icc_bridge_cpued_add_req
> > ---
> >  hw/cpu/icc_bus.c          | 46 ++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/i386/pc.c              |  9 +++++++--
> >  include/hw/i386/icc_bus.h |  2 ++
> >  3 files changed, 55 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/cpu/icc_bus.c b/hw/cpu/icc_bus.c
> > index ab9623d..5c0b9d4 100644
> > --- a/hw/cpu/icc_bus.c
> > +++ b/hw/cpu/icc_bus.c
> > @@ -18,6 +18,7 @@
> >   */
> >  #include "hw/i386/icc_bus.h"
> >  #include "hw/sysbus.h"
> > +#include "sysemu/sysemu.h"
> >  
> >  static void icc_bus_initfn(Object *obj)
> >  {
> > @@ -61,15 +62,39 @@ typedef struct ICCBridgeState {
> >      SysBusDevice busdev;
> >      MemoryRegion apic_container;
> >      MemoryRegion ioapic_container;
> > +    Notifier cpu_added_notifier;
> > +    Object **links;
> >  } ICCBridgeState;
> >  #define ICC_BRIGDE(obj) OBJECT_CHECK(ICCBridgeState, (obj), TYPE_ICC_BRIDGE)
> >  
> >  
> > +void icc_bridge_set_cpu_link(Object *bridge, Object *cpu_obj)
> > +{
> > +    gchar *name;
> > +    Error *error = NULL;
> > +    CPUState *cpu = CPU(cpu_obj);
> > +    int64_t id = CPU_GET_CLASS(cpu)->get_arch_id(cpu);
> > +
> > +    name = g_strdup_printf("cpu[%" PRIu32 "]", x86_cpu_apic_id_from_index(id));
> > +    object_property_set_link(bridge, cpu_obj, name, &error);
> > +    g_free(name);
> > +
> > +    g_assert(error == NULL);
> > +}
> > +
> > +static void icc_bridge_cpu_added_req(Notifier *n, void *opaque)
> > +{
> > +    ICCBridgeState *s = container_of(n, ICCBridgeState, cpu_added_notifier);
> > +
> > +    icc_bridge_set_cpu_link(OBJECT(s), OBJECT(opaque));
> > +}
> > +
> >  static void icc_bridge_initfn(Object *obj)
> >  {
> >      ICCBridgeState *s = ICC_BRIGDE(obj);
> >      SysBusDevice *sb = SYS_BUS_DEVICE(obj);
> >      ICCBus *ibus;
> > +    int i;
> >  
> >      ibus = ICC_BUS(qbus_create(TYPE_ICC_BUS, DEVICE(obj), "icc-bus"));
> >  
> > @@ -85,12 +110,33 @@ static void icc_bridge_initfn(Object *obj)
> >      memory_region_init(&s->ioapic_container, "icc-ioapic-container", 0x1000);
> >      sysbus_init_mmio(sb, &s->ioapic_container);
> >      ibus->ioapic_address_space = &s->ioapic_container;
> > +
> > +    s->links = g_malloc0(sizeof(Object *) * max_cpus);
> > +    for (i = 0; i < max_cpus; i++) {
> > +        gchar *cpu_name;
> > +
> > +        cpu_name = g_strdup_printf("cpu[%" PRIu32 "]",
> > +                                   x86_cpu_apic_id_from_index(i));
> > +        object_property_add_link(obj, cpu_name, TYPE_CPU, &s->links[i], NULL);
> > +        g_free(cpu_name);
> > +    }
> > +
> > +    s->cpu_added_notifier.notify = icc_bridge_cpu_added_req;
> > +    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
> > +}
> > +
> > +static void icc_bridge_fini(Object *obj)
> > +{
> > +    ICCBridgeState *s = ICC_BRIGDE(obj);
> > +
> > +    g_free(s->links);
> >  }
> >  
> >  static const TypeInfo icc_bridge_info = {
> >      .name  = "icc-bridge",
> >      .parent = TYPE_SYS_BUS_DEVICE,
> >      .instance_init  = icc_bridge_initfn,
> > +    .instance_finalize  = icc_bridge_fini,
> >      .instance_size  = sizeof(ICCBridgeState),
> >  };
> >  
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 6d5e164..ada235c 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -870,7 +870,8 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
> >      }
> >  }
> >  
> > -static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
> > +static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
> > +                          SysBusDevice *icc_bridge, Error **errp)
> >  {
> >      X86CPU *cpu;
> >  
> > @@ -882,6 +883,10 @@ static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
> >      object_property_set_int(OBJECT(cpu), apic_id, "apic-id", errp);
> >      object_property_set_bool(OBJECT(cpu), true, "realized", errp);
> >  
> > +    if (icc_bridge != NULL) {
> > +        icc_bridge_set_cpu_link(OBJECT(icc_bridge), OBJECT(cpu));
> > +    }
> > +
> >      if (error_is_set(errp)) {
> >          if (cpu != NULL) {
> >              object_unref(OBJECT(cpu));
> > @@ -911,7 +916,7 @@ void pc_cpus_init(const char *cpu_model)
> >                                                   TYPE_ICC_BRIDGE, NULL));
> >  
> >      for (i = 0; i < smp_cpus; i++) {
> > -        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), &error);
> > +        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), ib, &error);
> >          if (error) {
> >              fprintf(stderr, "%s\n", error_get_pretty(error));
> >              error_free(error);
> > diff --git a/include/hw/i386/icc_bus.h b/include/hw/i386/icc_bus.h
> > index 69a0278..bc31cd9 100644
> > --- a/include/hw/i386/icc_bus.h
> > +++ b/include/hw/i386/icc_bus.h
> > @@ -49,5 +49,7 @@ typedef struct ICCDeviceClass {
> >  
> >  #define TYPE_ICC_BRIDGE "icc-bridge"
> >  
> > +void icc_bridge_set_cpu_link(Object *bridge, Object *cpu);
> > +
> >  #endif /* CONFIG_USER_ONLY */
> >  #endif
> > -- 
> > 1.8.2
> > 
> 
> -- 
> Eduardo
Eduardo Habkost April 12, 2013, 12:44 p.m. UTC | #2
On Fri, Apr 12, 2013 at 12:01:03PM +0200, Igor Mammedov wrote:
> On Thu, 11 Apr 2013 14:19:37 -0300
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
> > On Thu, Apr 11, 2013 at 04:51:57PM +0200, Igor Mammedov wrote:
> > > ... and leave links for not present CPUs empty.
> > > 
> > > It will allow users to query for possible APIC IDs and use them
> > > with cpu-add QMP command.
> > > 
> > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > 
> > I don't see anything wrong with having icc-bridge links as well, but I
> > would really like to have a target-independent namespace with links,
> > that could be used to query for the available/valid CPU IDs for cpu-add
> > commands instead of icc-bridge. The IDs on that namespace could be
> > considered completely opaque.
> 
> Considering that -numa in present state is not compatible with cpu-add
> and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> patch could be dropped without any harm. libvirt could just use
> numbers from this sequence like it's doing with current cpu_set without
> any ID discovery. 

But it's not -numa that makes APIC ID probing necessary, it's
non-power-of-2 core/thread counts on -smp (that make APIC IDs not match
CPU indexes).

"Don't use CPU hotplug with -numa" is easy to be understood by users and
by libvirt, but "don't use CPU hotplug with non-power-of-2 cores/threads
counts" is harder to explain.


> 
> So, I've postponed target independent until we have -numa reworked,
> then we could have /machine/node/socket/cpu containers with links.
> The problem that needs to be solved, is the links storage ownership.
> Who should allocate and own it? If machine was QOM object already,
> I'd go with machine but it's not yet.

If we use CPU index as argument to cpu-add, we don't need to handle all
those problems right now, we don't need to expose an APIC ID discovery
interface, we make it work even with non-power-of-2 cores/threads
counts, and we make it work with -numa.

So, my big question is: why are we trying so hard to avoid using CPU
indexes as argument to cpu-add, if it's so much easier, and it is an
obvious solution that makes the interface target-independent without any
extra effort?

> 
> > 
> > > ---
> > > v2:
> > >  * s/get_firmware_id/get_arch_id/ due to rebase
> > >  * rename cpu_add_notifier to cpu_added_notifier &
> > >    icc_bridge_cpu_add_req -> icc_bridge_cpued_add_req
> > > ---
> > >  hw/cpu/icc_bus.c          | 46 ++++++++++++++++++++++++++++++++++++++++++++++
> > >  hw/i386/pc.c              |  9 +++++++--
> > >  include/hw/i386/icc_bus.h |  2 ++
> > >  3 files changed, 55 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/hw/cpu/icc_bus.c b/hw/cpu/icc_bus.c
> > > index ab9623d..5c0b9d4 100644
> > > --- a/hw/cpu/icc_bus.c
> > > +++ b/hw/cpu/icc_bus.c
> > > @@ -18,6 +18,7 @@
> > >   */
> > >  #include "hw/i386/icc_bus.h"
> > >  #include "hw/sysbus.h"
> > > +#include "sysemu/sysemu.h"
> > >  
> > >  static void icc_bus_initfn(Object *obj)
> > >  {
> > > @@ -61,15 +62,39 @@ typedef struct ICCBridgeState {
> > >      SysBusDevice busdev;
> > >      MemoryRegion apic_container;
> > >      MemoryRegion ioapic_container;
> > > +    Notifier cpu_added_notifier;
> > > +    Object **links;
> > >  } ICCBridgeState;
> > >  #define ICC_BRIGDE(obj) OBJECT_CHECK(ICCBridgeState, (obj), TYPE_ICC_BRIDGE)
> > >  
> > >  
> > > +void icc_bridge_set_cpu_link(Object *bridge, Object *cpu_obj)
> > > +{
> > > +    gchar *name;
> > > +    Error *error = NULL;
> > > +    CPUState *cpu = CPU(cpu_obj);
> > > +    int64_t id = CPU_GET_CLASS(cpu)->get_arch_id(cpu);
> > > +
> > > +    name = g_strdup_printf("cpu[%" PRIu32 "]", x86_cpu_apic_id_from_index(id));
> > > +    object_property_set_link(bridge, cpu_obj, name, &error);
> > > +    g_free(name);
> > > +
> > > +    g_assert(error == NULL);
> > > +}
> > > +
> > > +static void icc_bridge_cpu_added_req(Notifier *n, void *opaque)
> > > +{
> > > +    ICCBridgeState *s = container_of(n, ICCBridgeState, cpu_added_notifier);
> > > +
> > > +    icc_bridge_set_cpu_link(OBJECT(s), OBJECT(opaque));
> > > +}
> > > +
> > >  static void icc_bridge_initfn(Object *obj)
> > >  {
> > >      ICCBridgeState *s = ICC_BRIGDE(obj);
> > >      SysBusDevice *sb = SYS_BUS_DEVICE(obj);
> > >      ICCBus *ibus;
> > > +    int i;
> > >  
> > >      ibus = ICC_BUS(qbus_create(TYPE_ICC_BUS, DEVICE(obj), "icc-bus"));
> > >  
> > > @@ -85,12 +110,33 @@ static void icc_bridge_initfn(Object *obj)
> > >      memory_region_init(&s->ioapic_container, "icc-ioapic-container", 0x1000);
> > >      sysbus_init_mmio(sb, &s->ioapic_container);
> > >      ibus->ioapic_address_space = &s->ioapic_container;
> > > +
> > > +    s->links = g_malloc0(sizeof(Object *) * max_cpus);
> > > +    for (i = 0; i < max_cpus; i++) {
> > > +        gchar *cpu_name;
> > > +
> > > +        cpu_name = g_strdup_printf("cpu[%" PRIu32 "]",
> > > +                                   x86_cpu_apic_id_from_index(i));
> > > +        object_property_add_link(obj, cpu_name, TYPE_CPU, &s->links[i], NULL);
> > > +        g_free(cpu_name);
> > > +    }
> > > +
> > > +    s->cpu_added_notifier.notify = icc_bridge_cpu_added_req;
> > > +    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
> > > +}
> > > +
> > > +static void icc_bridge_fini(Object *obj)
> > > +{
> > > +    ICCBridgeState *s = ICC_BRIGDE(obj);
> > > +
> > > +    g_free(s->links);
> > >  }
> > >  
> > >  static const TypeInfo icc_bridge_info = {
> > >      .name  = "icc-bridge",
> > >      .parent = TYPE_SYS_BUS_DEVICE,
> > >      .instance_init  = icc_bridge_initfn,
> > > +    .instance_finalize  = icc_bridge_fini,
> > >      .instance_size  = sizeof(ICCBridgeState),
> > >  };
> > >  
> > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > index 6d5e164..ada235c 100644
> > > --- a/hw/i386/pc.c
> > > +++ b/hw/i386/pc.c
> > > @@ -870,7 +870,8 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
> > >      }
> > >  }
> > >  
> > > -static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
> > > +static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
> > > +                          SysBusDevice *icc_bridge, Error **errp)
> > >  {
> > >      X86CPU *cpu;
> > >  
> > > @@ -882,6 +883,10 @@ static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
> > >      object_property_set_int(OBJECT(cpu), apic_id, "apic-id", errp);
> > >      object_property_set_bool(OBJECT(cpu), true, "realized", errp);
> > >  
> > > +    if (icc_bridge != NULL) {
> > > +        icc_bridge_set_cpu_link(OBJECT(icc_bridge), OBJECT(cpu));
> > > +    }
> > > +
> > >      if (error_is_set(errp)) {
> > >          if (cpu != NULL) {
> > >              object_unref(OBJECT(cpu));
> > > @@ -911,7 +916,7 @@ void pc_cpus_init(const char *cpu_model)
> > >                                                   TYPE_ICC_BRIDGE, NULL));
> > >  
> > >      for (i = 0; i < smp_cpus; i++) {
> > > -        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), &error);
> > > +        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), ib, &error);
> > >          if (error) {
> > >              fprintf(stderr, "%s\n", error_get_pretty(error));
> > >              error_free(error);
> > > diff --git a/include/hw/i386/icc_bus.h b/include/hw/i386/icc_bus.h
> > > index 69a0278..bc31cd9 100644
> > > --- a/include/hw/i386/icc_bus.h
> > > +++ b/include/hw/i386/icc_bus.h
> > > @@ -49,5 +49,7 @@ typedef struct ICCDeviceClass {
> > >  
> > >  #define TYPE_ICC_BRIDGE "icc-bridge"
> > >  
> > > +void icc_bridge_set_cpu_link(Object *bridge, Object *cpu);
> > > +
> > >  #endif /* CONFIG_USER_ONLY */
> > >  #endif
> > > -- 
> > > 1.8.2
> > > 
> > 
> > -- 
> > Eduardo
> 
> 
> -- 
> Regards,
>   Igor
Igor Mammedov April 15, 2013, 2:15 p.m. UTC | #3
On Fri, 12 Apr 2013 09:44:09 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Fri, Apr 12, 2013 at 12:01:03PM +0200, Igor Mammedov wrote:
> > On Thu, 11 Apr 2013 14:19:37 -0300
> > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > 
> > > On Thu, Apr 11, 2013 at 04:51:57PM +0200, Igor Mammedov wrote:
> > > > ... and leave links for not present CPUs empty.
> > > > 
> > > > It will allow users to query for possible APIC IDs and use them
> > > > with cpu-add QMP command.
> > > > 
> > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > 
> > > I don't see anything wrong with having icc-bridge links as well, but I
> > > would really like to have a target-independent namespace with links,
> > > that could be used to query for the available/valid CPU IDs for cpu-add
> > > commands instead of icc-bridge. The IDs on that namespace could be
> > > considered completely opaque.
> > 
> > Considering that -numa in present state is not compatible with cpu-add
> > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > patch could be dropped without any harm. libvirt could just use
> > numbers from this sequence like it's doing with current cpu_set without
> > any ID discovery. 
> 
> But it's not -numa that makes APIC ID probing necessary, it's
> non-power-of-2 core/thread counts on -smp (that make APIC IDs not match
> CPU indexes).
> 
> "Don't use CPU hotplug with -numa" is easy to be understood by users and
> by libvirt, but "don't use CPU hotplug with non-power-of-2 cores/threads
> counts" is harder to explain.
> 
> 
> > 
> > So, I've postponed target independent until we have -numa reworked,
> > then we could have /machine/node/socket/cpu containers with links.
> > The problem that needs to be solved, is the links storage ownership.
> > Who should allocate and own it? If machine was QOM object already,
> > I'd go with machine but it's not yet.
> 
> If we use CPU index as argument to cpu-add, we don't need to handle all
> those problems right now, we don't need to expose an APIC ID discovery
> interface, we make it work even with non-power-of-2 cores/threads
yes, you will get non-power-of-2 working without ID look-up.

> counts, and we make it work with -numa.
But you won't get this since, only next non-plugged ID will work, due to how
cpu_index is allocated. You can't just overwrite it with new value without
breaking current code.

> 
> So, my big question is: why are we trying so hard to avoid using CPU
> indexes as argument to cpu-add, if it's so much easier, and it is an
> obvious solution that makes the interface target-independent without any
> extra effort?
Using cpu_index instead of APIC ID definitely is not effort free and requires
quite a bit of rewrite how its used currently, APIC ID is much much easier
and less risky choice in this regard.

As for target-independence, any kind of ID is target-independent if treated as
opaque. Given that with unplug should come not-contiguous ID usage, the
interface to track which CPUs are plugged would be needed anyway. So it could
be introduced with this series and provide ID look-up meanwhile. That would
give libvirt time actually to start using it, and just remove not-contiguous
ID restriction when unplug is ready with all necessary infrastructure
already around.

> 
> > 
> > > 
> > > > ---
> > > > v2:
> > > >  * s/get_firmware_id/get_arch_id/ due to rebase
> > > >  * rename cpu_add_notifier to cpu_added_notifier &
> > > >    icc_bridge_cpu_add_req -> icc_bridge_cpued_add_req
> > > > ---
> > > >  hw/cpu/icc_bus.c          | 46
> > > > ++++++++++++++++++++++++++++++++++++++++++++++
> > > > hw/i386/pc.c              |  9 +++++++-- include/hw/i386/icc_bus.h |
> > > > 2 ++ 3 files changed, 55 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/hw/cpu/icc_bus.c b/hw/cpu/icc_bus.c
> > > > index ab9623d..5c0b9d4 100644
> > > > --- a/hw/cpu/icc_bus.c
> > > > +++ b/hw/cpu/icc_bus.c
> > > > @@ -18,6 +18,7 @@
> > > >   */
> > > >  #include "hw/i386/icc_bus.h"
> > > >  #include "hw/sysbus.h"
> > > > +#include "sysemu/sysemu.h"
> > > >  
> > > >  static void icc_bus_initfn(Object *obj)
> > > >  {
> > > > @@ -61,15 +62,39 @@ typedef struct ICCBridgeState {
> > > >      SysBusDevice busdev;
> > > >      MemoryRegion apic_container;
> > > >      MemoryRegion ioapic_container;
> > > > +    Notifier cpu_added_notifier;
> > > > +    Object **links;
> > > >  } ICCBridgeState;
> > > >  #define ICC_BRIGDE(obj) OBJECT_CHECK(ICCBridgeState, (obj),
> > > > TYPE_ICC_BRIDGE) 
> > > >  
> > > > +void icc_bridge_set_cpu_link(Object *bridge, Object *cpu_obj)
> > > > +{
> > > > +    gchar *name;
> > > > +    Error *error = NULL;
> > > > +    CPUState *cpu = CPU(cpu_obj);
> > > > +    int64_t id = CPU_GET_CLASS(cpu)->get_arch_id(cpu);
> > > > +
> > > > +    name = g_strdup_printf("cpu[%" PRIu32 "]",
> > > > x86_cpu_apic_id_from_index(id));
> > > > +    object_property_set_link(bridge, cpu_obj, name, &error);
> > > > +    g_free(name);
> > > > +
> > > > +    g_assert(error == NULL);
> > > > +}
> > > > +
> > > > +static void icc_bridge_cpu_added_req(Notifier *n, void *opaque)
> > > > +{
> > > > +    ICCBridgeState *s = container_of(n, ICCBridgeState,
> > > > cpu_added_notifier); +
> > > > +    icc_bridge_set_cpu_link(OBJECT(s), OBJECT(opaque));
> > > > +}
> > > > +
> > > >  static void icc_bridge_initfn(Object *obj)
> > > >  {
> > > >      ICCBridgeState *s = ICC_BRIGDE(obj);
> > > >      SysBusDevice *sb = SYS_BUS_DEVICE(obj);
> > > >      ICCBus *ibus;
> > > > +    int i;
> > > >  
> > > >      ibus = ICC_BUS(qbus_create(TYPE_ICC_BUS, DEVICE(obj),
> > > > "icc-bus")); 
> > > > @@ -85,12 +110,33 @@ static void icc_bridge_initfn(Object *obj)
> > > >      memory_region_init(&s->ioapic_container, "icc-ioapic-container",
> > > > 0x1000); sysbus_init_mmio(sb, &s->ioapic_container);
> > > >      ibus->ioapic_address_space = &s->ioapic_container;
> > > > +
> > > > +    s->links = g_malloc0(sizeof(Object *) * max_cpus);
> > > > +    for (i = 0; i < max_cpus; i++) {
> > > > +        gchar *cpu_name;
> > > > +
> > > > +        cpu_name = g_strdup_printf("cpu[%" PRIu32 "]",
> > > > +                                   x86_cpu_apic_id_from_index(i));
> > > > +        object_property_add_link(obj, cpu_name, TYPE_CPU,
> > > > &s->links[i], NULL);
> > > > +        g_free(cpu_name);
> > > > +    }
> > > > +
> > > > +    s->cpu_added_notifier.notify = icc_bridge_cpu_added_req;
> > > > +    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
> > > > +}
> > > > +
> > > > +static void icc_bridge_fini(Object *obj)
> > > > +{
> > > > +    ICCBridgeState *s = ICC_BRIGDE(obj);
> > > > +
> > > > +    g_free(s->links);
> > > >  }
> > > >  
> > > >  static const TypeInfo icc_bridge_info = {
> > > >      .name  = "icc-bridge",
> > > >      .parent = TYPE_SYS_BUS_DEVICE,
> > > >      .instance_init  = icc_bridge_initfn,
> > > > +    .instance_finalize  = icc_bridge_fini,
> > > >      .instance_size  = sizeof(ICCBridgeState),
> > > >  };
> > > >  
> > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > > index 6d5e164..ada235c 100644
> > > > --- a/hw/i386/pc.c
> > > > +++ b/hw/i386/pc.c
> > > > @@ -870,7 +870,8 @@ void pc_acpi_smi_interrupt(void *opaque, int irq,
> > > > int level) }
> > > >  }
> > > >  
> > > > -static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
> > > > Error **errp) +static X86CPU *pc_new_cpu(const char *cpu_model,
> > > > int64_t apic_id,
> > > > +                          SysBusDevice *icc_bridge, Error **errp)
> > > >  {
> > > >      X86CPU *cpu;
> > > >  
> > > > @@ -882,6 +883,10 @@ static X86CPU *pc_new_cpu(const char *cpu_model,
> > > > int64_t apic_id, Error **errp) object_property_set_int(OBJECT(cpu),
> > > > apic_id, "apic-id", errp); object_property_set_bool(OBJECT(cpu),
> > > > true, "realized", errp); 
> > > > +    if (icc_bridge != NULL) {
> > > > +        icc_bridge_set_cpu_link(OBJECT(icc_bridge), OBJECT(cpu));
> > > > +    }
> > > > +
> > > >      if (error_is_set(errp)) {
> > > >          if (cpu != NULL) {
> > > >              object_unref(OBJECT(cpu));
> > > > @@ -911,7 +916,7 @@ void pc_cpus_init(const char *cpu_model)
> > > >                                                   TYPE_ICC_BRIDGE,
> > > > NULL)); 
> > > >      for (i = 0; i < smp_cpus; i++) {
> > > > -        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
> > > > &error);
> > > > +        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
> > > > ib, &error); if (error) {
> > > >              fprintf(stderr, "%s\n", error_get_pretty(error));
> > > >              error_free(error);
> > > > diff --git a/include/hw/i386/icc_bus.h b/include/hw/i386/icc_bus.h
> > > > index 69a0278..bc31cd9 100644
> > > > --- a/include/hw/i386/icc_bus.h
> > > > +++ b/include/hw/i386/icc_bus.h
> > > > @@ -49,5 +49,7 @@ typedef struct ICCDeviceClass {
> > > >  
> > > >  #define TYPE_ICC_BRIDGE "icc-bridge"
> > > >  
> > > > +void icc_bridge_set_cpu_link(Object *bridge, Object *cpu);
> > > > +
> > > >  #endif /* CONFIG_USER_ONLY */
> > > >  #endif
> > > > -- 
> > > > 1.8.2
> > > > 
> > > 
> > > -- 
> > > Eduardo
> > 
> > 
> > -- 
> > Regards,
> >   Igor
>
Eduardo Habkost April 15, 2013, 2:48 p.m. UTC | #4
On Mon, Apr 15, 2013 at 04:15:08PM +0200, Igor Mammedov wrote:
> On Fri, 12 Apr 2013 09:44:09 -0300
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
> > On Fri, Apr 12, 2013 at 12:01:03PM +0200, Igor Mammedov wrote:
> > > On Thu, 11 Apr 2013 14:19:37 -0300
> > > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > > 
> > > > On Thu, Apr 11, 2013 at 04:51:57PM +0200, Igor Mammedov wrote:
> > > > > ... and leave links for not present CPUs empty.
> > > > > 
> > > > > It will allow users to query for possible APIC IDs and use them
> > > > > with cpu-add QMP command.
> > > > > 
> > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > 
> > > > I don't see anything wrong with having icc-bridge links as well, but I
> > > > would really like to have a target-independent namespace with links,
> > > > that could be used to query for the available/valid CPU IDs for cpu-add
> > > > commands instead of icc-bridge. The IDs on that namespace could be
> > > > considered completely opaque.
> > > 
> > > Considering that -numa in present state is not compatible with cpu-add
> > > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > > patch could be dropped without any harm. libvirt could just use
> > > numbers from this sequence like it's doing with current cpu_set without
> > > any ID discovery. 
> > 
> > But it's not -numa that makes APIC ID probing necessary, it's
> > non-power-of-2 core/thread counts on -smp (that make APIC IDs not match
> > CPU indexes).
> > 
> > "Don't use CPU hotplug with -numa" is easy to be understood by users and
> > by libvirt, but "don't use CPU hotplug with non-power-of-2 cores/threads
> > counts" is harder to explain.
> > 
> > 
> > > 
> > > So, I've postponed target independent until we have -numa reworked,
> > > then we could have /machine/node/socket/cpu containers with links.
> > > The problem that needs to be solved, is the links storage ownership.
> > > Who should allocate and own it? If machine was QOM object already,
> > > I'd go with machine but it's not yet.
> > 
> > If we use CPU index as argument to cpu-add, we don't need to handle all
> > those problems right now, we don't need to expose an APIC ID discovery
> > interface, we make it work even with non-power-of-2 cores/threads
> yes, you will get non-power-of-2 working without ID look-up.
> 
> > counts, and we make it work with -numa.
> But you won't get this since, only next non-plugged ID will work, due to how
> cpu_index is allocated. You can't just overwrite it with new value without
> breaking current code.

OK, I think I get it: the problem is the cpu_index field specifically,
because it is set automatically, and there may be lots of assumptions in
the code about it. Maybe I should rephrase my suggestion: let's replace
"CPU index" above with "a predictable ID allocation system where IDs
will be in the range 0..(max_cpus-1)". We don't even need to touch the
cpu_index field in the CPU objects if we think this is too risky.

I am just proposing that we use IDs so that we just need to add a
   apic_id = apic_id_for_cpu_index(id)
line at the beginning of the cpu-add implementation, and everything else
would look exactly the same.

Nothing else would change in your implementation, except that now we
won't need a ID lookup system for 1.5 because libvirt can assume that
the next available CPU ID will be smp_cpus+1.

There would be no requirement to make the index-based IDs contiguous,
just like there's no requirement to make APIC IDs contiguous. The only
difference is that CPU indexes will predictable IDs, that will be always
in the range 0..(max_cpus-1) and won't require an ID lookup mechanism.
And it will work with non-power-of-2 threads/counts, and it will work
with -numa.

> 
> > 
> > So, my big question is: why are we trying so hard to avoid using CPU
> > indexes as argument to cpu-add, if it's so much easier, and it is an
> > obvious solution that makes the interface target-independent without any
> > extra effort?
> Using cpu_index instead of APIC ID definitely is not effort free and requires
> quite a bit of rewrite how its used currently, APIC ID is much much easier
> and less risky choice in this regard.
> 
> As for target-independence, any kind of ID is target-independent if treated as
> opaque.

True, as long as we have a target-independent ID lookup system, which we
don't have today. That's the part where we would need less effort.


> Given that with unplug should come not-contiguous ID usage, the
> interface to track which CPUs are plugged would be needed anyway. So it could
> be introduced with this series and provide ID look-up meanwhile. That would
> give libvirt time actually to start using it, and just remove not-contiguous
> ID restriction when unplug is ready with all necessary infrastructure
> already around.

Let's rephrase my suggestion: I don't think we should use the internal
"cpu->cpu_index" field as ID, necessarily. I only suggest that we make
the IDs predictably in the range 0..(max_cpus-1) so we:
1) don't need an ID lookup mechanism;
2) keep it compatible with the existing -numa options.

If we really want to use APIC ID one day, we may implement an lookup
mechanism that will provide IDs to libvirt, and declare the "IDs between
0..(max_cpus-1)" assumption as deprecated, so we can start using APIC
IDs as the (opaque) CPU IDs in the future. But before we do that, we
would implement a better "-numa" interface first.

You even suggested that we did something similar, above:

> > > Considering that -numa in present state is not compatible with cpu-add
> > > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > > patch could be dropped without any harm. libvirt could just use
> > > numbers from this sequence like it's doing with current cpu_set without
> > > any ID discovery. 

What I suggest is that we let libvirt make exactly the same assumptions
you suggest, but if we add a single line to the code:
    apic_id = apic_id_for_cpu_index(id)
we will make this intermediate solution work with NUMA _and_ work with
non-power-of-2 cores/threads counts.

The other cases (non-numa with power-of-2 cores/threads counts) would
work exactly the same, because in those cases
apic_id_for_cpu_index(id) == id.
Igor Mammedov April 15, 2013, 3:16 p.m. UTC | #5
On Mon, 15 Apr 2013 11:48:45 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Mon, Apr 15, 2013 at 04:15:08PM +0200, Igor Mammedov wrote:
> > On Fri, 12 Apr 2013 09:44:09 -0300
> > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > 
> > > On Fri, Apr 12, 2013 at 12:01:03PM +0200, Igor Mammedov wrote:
> > > > On Thu, 11 Apr 2013 14:19:37 -0300
> > > > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > > > 
> > > > > On Thu, Apr 11, 2013 at 04:51:57PM +0200, Igor Mammedov wrote:
> > > > > > ... and leave links for not present CPUs empty.
> > > > > > 
> > > > > > It will allow users to query for possible APIC IDs and use them
> > > > > > with cpu-add QMP command.
> > > > > > 
> > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > 
> > > > > I don't see anything wrong with having icc-bridge links as well,
> > > > > but I would really like to have a target-independent namespace with
> > > > > links, that could be used to query for the available/valid CPU IDs
> > > > > for cpu-add commands instead of icc-bridge. The IDs on that
> > > > > namespace could be considered completely opaque.
> > > > 
> > > > Considering that -numa in present state is not compatible with cpu-add
> > > > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > > > patch could be dropped without any harm. libvirt could just use
> > > > numbers from this sequence like it's doing with current cpu_set
> > > > without any ID discovery. 
> > > 
> > > But it's not -numa that makes APIC ID probing necessary, it's
> > > non-power-of-2 core/thread counts on -smp (that make APIC IDs not match
> > > CPU indexes).
> > > 
> > > "Don't use CPU hotplug with -numa" is easy to be understood by users and
> > > by libvirt, but "don't use CPU hotplug with non-power-of-2 cores/threads
> > > counts" is harder to explain.
> > > 
> > > 
> > > > 
> > > > So, I've postponed target independent until we have -numa reworked,
> > > > then we could have /machine/node/socket/cpu containers with links.
> > > > The problem that needs to be solved, is the links storage ownership.
> > > > Who should allocate and own it? If machine was QOM object already,
> > > > I'd go with machine but it's not yet.
> > > 
> > > If we use CPU index as argument to cpu-add, we don't need to handle all
> > > those problems right now, we don't need to expose an APIC ID discovery
> > > interface, we make it work even with non-power-of-2 cores/threads
> > yes, you will get non-power-of-2 working without ID look-up.
> > 
> > > counts, and we make it work with -numa.
> > But you won't get this since, only next non-plugged ID will work, due to
> > how cpu_index is allocated. You can't just overwrite it with new value
> > without breaking current code.
> 
> OK, I think I get it: the problem is the cpu_index field specifically,
> because it is set automatically, and there may be lots of assumptions in
> the code about it. Maybe I should rephrase my suggestion: let's replace
> "CPU index" above with "a predictable ID allocation system where IDs
> will be in the range 0..(max_cpus-1)". We don't even need to touch the
> cpu_index field in the CPU objects if we think this is too risky.
> 
> I am just proposing that we use IDs so that we just need to add a
>    apic_id = apic_id_for_cpu_index(id)
> line at the beginning of the cpu-add implementation, and everything else
> would look exactly the same.
> 
> Nothing else would change in your implementation, except that now we
> won't need a ID lookup system for 1.5 because libvirt can assume that
> the next available CPU ID will be smp_cpus+1.
> 
> There would be no requirement to make the index-based IDs contiguous,
> just like there's no requirement to make APIC IDs contiguous. The only
> difference is that CPU indexes will predictable IDs, that will be always
> in the range 0..(max_cpus-1) and won't require an ID lookup mechanism.
> And it will work with non-power-of-2 threads/counts, and it will work
> with -numa.
> 
> > 
> > > 
> > > So, my big question is: why are we trying so hard to avoid using CPU
> > > indexes as argument to cpu-add, if it's so much easier, and it is an
> > > obvious solution that makes the interface target-independent without any
> > > extra effort?
> > Using cpu_index instead of APIC ID definitely is not effort free and
> > requires quite a bit of rewrite how its used currently, APIC ID is much
> > much easier and less risky choice in this regard.
> > 
> > As for target-independence, any kind of ID is target-independent if
> > treated as opaque.
> 
> True, as long as we have a target-independent ID lookup system, which we
> don't have today. That's the part where we would need less effort.
> 
> 
> > Given that with unplug should come not-contiguous ID usage, the
> > interface to track which CPUs are plugged would be needed anyway. So it
> > could be introduced with this series and provide ID look-up meanwhile.
> > That would give libvirt time actually to start using it, and just remove
> > not-contiguous ID restriction when unplug is ready with all necessary
> > infrastructure already around.
> 
> Let's rephrase my suggestion: I don't think we should use the internal
> "cpu->cpu_index" field as ID, necessarily. I only suggest that we make
> the IDs predictably in the range 0..(max_cpus-1) so we:
> 1) don't need an ID lookup mechanism;
> 2) keep it compatible with the existing -numa options.
> 
> If we really want to use APIC ID one day, we may implement an lookup
> mechanism that will provide IDs to libvirt, and declare the "IDs between
> 0..(max_cpus-1)" assumption as deprecated, so we can start using APIC
> IDs as the (opaque) CPU IDs in the future. But before we do that, we
> would implement a better "-numa" interface first.
> 
> You even suggested that we did something similar, above:
> 
> > > > Considering that -numa in present state is not compatible with cpu-add
> > > > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > > > patch could be dropped without any harm. libvirt could just use
> > > > numbers from this sequence like it's doing with current cpu_set
> > > > without any ID discovery. 
> 
> What I suggest is that we let libvirt make exactly the same assumptions
> you suggest, but if we add a single line to the code:
>     apic_id = apic_id_for_cpu_index(id)
> we will make this intermediate solution work with NUMA _and_ work with
> non-power-of-2 cores/threads counts.
> 
> The other cases (non-numa with power-of-2 cores/threads counts) would
> work exactly the same, because in those cases
> apic_id_for_cpu_index(id) == id.
> 

Rephrasing suggestion:
drop 18/19 and use apic_id = apic_id_for_cpu_index(id) in 19/19
Am I correct?
Eduardo Habkost April 15, 2013, 3:26 p.m. UTC | #6
On Mon, Apr 15, 2013 at 05:16:26PM +0200, Igor Mammedov wrote:
> On Mon, 15 Apr 2013 11:48:45 -0300
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
> > On Mon, Apr 15, 2013 at 04:15:08PM +0200, Igor Mammedov wrote:
> > > On Fri, 12 Apr 2013 09:44:09 -0300
> > > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > > 
> > > > On Fri, Apr 12, 2013 at 12:01:03PM +0200, Igor Mammedov wrote:
> > > > > On Thu, 11 Apr 2013 14:19:37 -0300
> > > > > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > > > > 
> > > > > > On Thu, Apr 11, 2013 at 04:51:57PM +0200, Igor Mammedov wrote:
> > > > > > > ... and leave links for not present CPUs empty.
> > > > > > > 
> > > > > > > It will allow users to query for possible APIC IDs and use them
> > > > > > > with cpu-add QMP command.
> > > > > > > 
> > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > 
> > > > > > I don't see anything wrong with having icc-bridge links as well,
> > > > > > but I would really like to have a target-independent namespace with
> > > > > > links, that could be used to query for the available/valid CPU IDs
> > > > > > for cpu-add commands instead of icc-bridge. The IDs on that
> > > > > > namespace could be considered completely opaque.
> > > > > 
> > > > > Considering that -numa in present state is not compatible with cpu-add
> > > > > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > > > > patch could be dropped without any harm. libvirt could just use
> > > > > numbers from this sequence like it's doing with current cpu_set
> > > > > without any ID discovery. 
> > > > 
> > > > But it's not -numa that makes APIC ID probing necessary, it's
> > > > non-power-of-2 core/thread counts on -smp (that make APIC IDs not match
> > > > CPU indexes).
> > > > 
> > > > "Don't use CPU hotplug with -numa" is easy to be understood by users and
> > > > by libvirt, but "don't use CPU hotplug with non-power-of-2 cores/threads
> > > > counts" is harder to explain.
> > > > 
> > > > 
> > > > > 
> > > > > So, I've postponed target independent until we have -numa reworked,
> > > > > then we could have /machine/node/socket/cpu containers with links.
> > > > > The problem that needs to be solved, is the links storage ownership.
> > > > > Who should allocate and own it? If machine was QOM object already,
> > > > > I'd go with machine but it's not yet.
> > > > 
> > > > If we use CPU index as argument to cpu-add, we don't need to handle all
> > > > those problems right now, we don't need to expose an APIC ID discovery
> > > > interface, we make it work even with non-power-of-2 cores/threads
> > > yes, you will get non-power-of-2 working without ID look-up.
> > > 
> > > > counts, and we make it work with -numa.
> > > But you won't get this since, only next non-plugged ID will work, due to
> > > how cpu_index is allocated. You can't just overwrite it with new value
> > > without breaking current code.
> > 
> > OK, I think I get it: the problem is the cpu_index field specifically,
> > because it is set automatically, and there may be lots of assumptions in
> > the code about it. Maybe I should rephrase my suggestion: let's replace
> > "CPU index" above with "a predictable ID allocation system where IDs
> > will be in the range 0..(max_cpus-1)". We don't even need to touch the
> > cpu_index field in the CPU objects if we think this is too risky.
> > 
> > I am just proposing that we use IDs so that we just need to add a
> >    apic_id = apic_id_for_cpu_index(id)
> > line at the beginning of the cpu-add implementation, and everything else
> > would look exactly the same.
> > 
> > Nothing else would change in your implementation, except that now we
> > won't need a ID lookup system for 1.5 because libvirt can assume that
> > the next available CPU ID will be smp_cpus+1.
> > 
> > There would be no requirement to make the index-based IDs contiguous,
> > just like there's no requirement to make APIC IDs contiguous. The only
> > difference is that CPU indexes will predictable IDs, that will be always
> > in the range 0..(max_cpus-1) and won't require an ID lookup mechanism.
> > And it will work with non-power-of-2 threads/counts, and it will work
> > with -numa.
> > 
> > > 
> > > > 
> > > > So, my big question is: why are we trying so hard to avoid using CPU
> > > > indexes as argument to cpu-add, if it's so much easier, and it is an
> > > > obvious solution that makes the interface target-independent without any
> > > > extra effort?
> > > Using cpu_index instead of APIC ID definitely is not effort free and
> > > requires quite a bit of rewrite how its used currently, APIC ID is much
> > > much easier and less risky choice in this regard.
> > > 
> > > As for target-independence, any kind of ID is target-independent if
> > > treated as opaque.
> > 
> > True, as long as we have a target-independent ID lookup system, which we
> > don't have today. That's the part where we would need less effort.
> > 
> > 
> > > Given that with unplug should come not-contiguous ID usage, the
> > > interface to track which CPUs are plugged would be needed anyway. So it
> > > could be introduced with this series and provide ID look-up meanwhile.
> > > That would give libvirt time actually to start using it, and just remove
> > > not-contiguous ID restriction when unplug is ready with all necessary
> > > infrastructure already around.
> > 
> > Let's rephrase my suggestion: I don't think we should use the internal
> > "cpu->cpu_index" field as ID, necessarily. I only suggest that we make
> > the IDs predictably in the range 0..(max_cpus-1) so we:
> > 1) don't need an ID lookup mechanism;
> > 2) keep it compatible with the existing -numa options.
> > 
> > If we really want to use APIC ID one day, we may implement an lookup
> > mechanism that will provide IDs to libvirt, and declare the "IDs between
> > 0..(max_cpus-1)" assumption as deprecated, so we can start using APIC
> > IDs as the (opaque) CPU IDs in the future. But before we do that, we
> > would implement a better "-numa" interface first.
> > 
> > You even suggested that we did something similar, above:
> > 
> > > > > Considering that -numa in present state is not compatible with cpu-add
> > > > > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > > > > patch could be dropped without any harm. libvirt could just use
> > > > > numbers from this sequence like it's doing with current cpu_set
> > > > > without any ID discovery. 
> > 
> > What I suggest is that we let libvirt make exactly the same assumptions
> > you suggest, but if we add a single line to the code:
> >     apic_id = apic_id_for_cpu_index(id)
> > we will make this intermediate solution work with NUMA _and_ work with
> > non-power-of-2 cores/threads counts.
> > 
> > The other cases (non-numa with power-of-2 cores/threads counts) would
> > work exactly the same, because in those cases
> > apic_id_for_cpu_index(id) == id.
> > 
> 
> Rephrasing suggestion:
> drop 18/19 and use apic_id = apic_id_for_cpu_index(id) in 19/19
> Am I correct?

Exactly. I mean: I expect that to be enough and not require other
changes, but I didn't look at all patches line-by-line yet.
Igor Mammedov April 15, 2013, 8:37 p.m. UTC | #7
On Mon, 15 Apr 2013 12:26:07 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Mon, Apr 15, 2013 at 05:16:26PM +0200, Igor Mammedov wrote:
> > On Mon, 15 Apr 2013 11:48:45 -0300
> > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > 
> > > On Mon, Apr 15, 2013 at 04:15:08PM +0200, Igor Mammedov wrote:
> > > > On Fri, 12 Apr 2013 09:44:09 -0300
> > > > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > > > 
> > > > > On Fri, Apr 12, 2013 at 12:01:03PM +0200, Igor Mammedov wrote:
> > > > > > On Thu, 11 Apr 2013 14:19:37 -0300
> > > > > > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > > > > > 
> > > > > > > On Thu, Apr 11, 2013 at 04:51:57PM +0200, Igor Mammedov wrote:
> > > > > > > > ... and leave links for not present CPUs empty.
> > > > > > > > 
> > > > > > > > It will allow users to query for possible APIC IDs and use them
> > > > > > > > with cpu-add QMP command.
> > > > > > > > 
> > > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > > 
> > > > > > > I don't see anything wrong with having icc-bridge links as well,
> > > > > > > but I would really like to have a target-independent namespace with
> > > > > > > links, that could be used to query for the available/valid CPU IDs
> > > > > > > for cpu-add commands instead of icc-bridge. The IDs on that
> > > > > > > namespace could be considered completely opaque.
> > > > > > 
> > > > > > Considering that -numa in present state is not compatible with cpu-add
> > > > > > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > > > > > patch could be dropped without any harm. libvirt could just use
> > > > > > numbers from this sequence like it's doing with current cpu_set
> > > > > > without any ID discovery. 
> > > > > 
> > > > > But it's not -numa that makes APIC ID probing necessary, it's
> > > > > non-power-of-2 core/thread counts on -smp (that make APIC IDs not match
> > > > > CPU indexes).
> > > > > 
> > > > > "Don't use CPU hotplug with -numa" is easy to be understood by users and
> > > > > by libvirt, but "don't use CPU hotplug with non-power-of-2 cores/threads
> > > > > counts" is harder to explain.
> > > > > 
> > > > > 
> > > > > > 
> > > > > > So, I've postponed target independent until we have -numa reworked,
> > > > > > then we could have /machine/node/socket/cpu containers with links.
> > > > > > The problem that needs to be solved, is the links storage ownership.
> > > > > > Who should allocate and own it? If machine was QOM object already,
> > > > > > I'd go with machine but it's not yet.
> > > > > 
> > > > > If we use CPU index as argument to cpu-add, we don't need to handle all
> > > > > those problems right now, we don't need to expose an APIC ID discovery
> > > > > interface, we make it work even with non-power-of-2 cores/threads
> > > > yes, you will get non-power-of-2 working without ID look-up.
> > > > 
> > > > > counts, and we make it work with -numa.
> > > > But you won't get this since, only next non-plugged ID will work, due to
> > > > how cpu_index is allocated. You can't just overwrite it with new value
> > > > without breaking current code.
> > > 
> > > OK, I think I get it: the problem is the cpu_index field specifically,
> > > because it is set automatically, and there may be lots of assumptions in
> > > the code about it. Maybe I should rephrase my suggestion: let's replace
> > > "CPU index" above with "a predictable ID allocation system where IDs
> > > will be in the range 0..(max_cpus-1)". We don't even need to touch the
> > > cpu_index field in the CPU objects if we think this is too risky.
> > > 
> > > I am just proposing that we use IDs so that we just need to add a
> > >    apic_id = apic_id_for_cpu_index(id)
> > > line at the beginning of the cpu-add implementation, and everything else
> > > would look exactly the same.
> > > 
> > > Nothing else would change in your implementation, except that now we
> > > won't need a ID lookup system for 1.5 because libvirt can assume that
> > > the next available CPU ID will be smp_cpus+1.
> > > 
> > > There would be no requirement to make the index-based IDs contiguous,
> > > just like there's no requirement to make APIC IDs contiguous. The only
> > > difference is that CPU indexes will predictable IDs, that will be always
> > > in the range 0..(max_cpus-1) and won't require an ID lookup mechanism.
> > > And it will work with non-power-of-2 threads/counts, and it will work
> > > with -numa.
> > > 
> > > > 
> > > > > 
> > > > > So, my big question is: why are we trying so hard to avoid using CPU
> > > > > indexes as argument to cpu-add, if it's so much easier, and it is an
> > > > > obvious solution that makes the interface target-independent without any
> > > > > extra effort?
> > > > Using cpu_index instead of APIC ID definitely is not effort free and
> > > > requires quite a bit of rewrite how its used currently, APIC ID is much
> > > > much easier and less risky choice in this regard.
> > > > 
> > > > As for target-independence, any kind of ID is target-independent if
> > > > treated as opaque.
> > > 
> > > True, as long as we have a target-independent ID lookup system, which we
> > > don't have today. That's the part where we would need less effort.
> > > 
> > > 
> > > > Given that with unplug should come not-contiguous ID usage, the
> > > > interface to track which CPUs are plugged would be needed anyway. So it
> > > > could be introduced with this series and provide ID look-up meanwhile.
> > > > That would give libvirt time actually to start using it, and just remove
> > > > not-contiguous ID restriction when unplug is ready with all necessary
> > > > infrastructure already around.
> > > 
> > > Let's rephrase my suggestion: I don't think we should use the internal
> > > "cpu->cpu_index" field as ID, necessarily. I only suggest that we make
> > > the IDs predictably in the range 0..(max_cpus-1) so we:
> > > 1) don't need an ID lookup mechanism;
> > > 2) keep it compatible with the existing -numa options.
> > > 
> > > If we really want to use APIC ID one day, we may implement an lookup
> > > mechanism that will provide IDs to libvirt, and declare the "IDs between
> > > 0..(max_cpus-1)" assumption as deprecated, so we can start using APIC
> > > IDs as the (opaque) CPU IDs in the future. But before we do that, we
> > > would implement a better "-numa" interface first.
> > > 
> > > You even suggested that we did something similar, above:
> > > 
> > > > > > Considering that -numa in present state is not compatible with cpu-add
> > > > > > and that all CPU ID in this case are are sequence [0..maxcpus-1], this
> > > > > > patch could be dropped without any harm. libvirt could just use
> > > > > > numbers from this sequence like it's doing with current cpu_set
> > > > > > without any ID discovery. 
> > > 
> > > What I suggest is that we let libvirt make exactly the same assumptions
> > > you suggest, but if we add a single line to the code:
> > >     apic_id = apic_id_for_cpu_index(id)
> > > we will make this intermediate solution work with NUMA _and_ work with
> > > non-power-of-2 cores/threads counts.
> > > 
> > > The other cases (non-numa with power-of-2 cores/threads counts) would
> > > work exactly the same, because in those cases
> > > apic_id_for_cpu_index(id) == id.
> > > 
> > 
> > Rephrasing suggestion:
> > drop 18/19 and use apic_id = apic_id_for_cpu_index(id) in 19/19
> > Am I correct?
> 
> Exactly. I mean: I expect that to be enough and not require other
> changes, but I didn't look at all patches line-by-line yet.
Ok, I'm dropping it and submitting amended series,

Thanks for review.
> 
> -- 
> Eduardo
>
diff mbox

Patch

diff --git a/hw/cpu/icc_bus.c b/hw/cpu/icc_bus.c
index ab9623d..5c0b9d4 100644
--- a/hw/cpu/icc_bus.c
+++ b/hw/cpu/icc_bus.c
@@ -18,6 +18,7 @@ 
  */
 #include "hw/i386/icc_bus.h"
 #include "hw/sysbus.h"
+#include "sysemu/sysemu.h"
 
 static void icc_bus_initfn(Object *obj)
 {
@@ -61,15 +62,39 @@  typedef struct ICCBridgeState {
     SysBusDevice busdev;
     MemoryRegion apic_container;
     MemoryRegion ioapic_container;
+    Notifier cpu_added_notifier;
+    Object **links;
 } ICCBridgeState;
 #define ICC_BRIGDE(obj) OBJECT_CHECK(ICCBridgeState, (obj), TYPE_ICC_BRIDGE)
 
 
+void icc_bridge_set_cpu_link(Object *bridge, Object *cpu_obj)
+{
+    gchar *name;
+    Error *error = NULL;
+    CPUState *cpu = CPU(cpu_obj);
+    int64_t id = CPU_GET_CLASS(cpu)->get_arch_id(cpu);
+
+    name = g_strdup_printf("cpu[%" PRIu32 "]", x86_cpu_apic_id_from_index(id));
+    object_property_set_link(bridge, cpu_obj, name, &error);
+    g_free(name);
+
+    g_assert(error == NULL);
+}
+
+static void icc_bridge_cpu_added_req(Notifier *n, void *opaque)
+{
+    ICCBridgeState *s = container_of(n, ICCBridgeState, cpu_added_notifier);
+
+    icc_bridge_set_cpu_link(OBJECT(s), OBJECT(opaque));
+}
+
 static void icc_bridge_initfn(Object *obj)
 {
     ICCBridgeState *s = ICC_BRIGDE(obj);
     SysBusDevice *sb = SYS_BUS_DEVICE(obj);
     ICCBus *ibus;
+    int i;
 
     ibus = ICC_BUS(qbus_create(TYPE_ICC_BUS, DEVICE(obj), "icc-bus"));
 
@@ -85,12 +110,33 @@  static void icc_bridge_initfn(Object *obj)
     memory_region_init(&s->ioapic_container, "icc-ioapic-container", 0x1000);
     sysbus_init_mmio(sb, &s->ioapic_container);
     ibus->ioapic_address_space = &s->ioapic_container;
+
+    s->links = g_malloc0(sizeof(Object *) * max_cpus);
+    for (i = 0; i < max_cpus; i++) {
+        gchar *cpu_name;
+
+        cpu_name = g_strdup_printf("cpu[%" PRIu32 "]",
+                                   x86_cpu_apic_id_from_index(i));
+        object_property_add_link(obj, cpu_name, TYPE_CPU, &s->links[i], NULL);
+        g_free(cpu_name);
+    }
+
+    s->cpu_added_notifier.notify = icc_bridge_cpu_added_req;
+    qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
+}
+
+static void icc_bridge_fini(Object *obj)
+{
+    ICCBridgeState *s = ICC_BRIGDE(obj);
+
+    g_free(s->links);
 }
 
 static const TypeInfo icc_bridge_info = {
     .name  = "icc-bridge",
     .parent = TYPE_SYS_BUS_DEVICE,
     .instance_init  = icc_bridge_initfn,
+    .instance_finalize  = icc_bridge_fini,
     .instance_size  = sizeof(ICCBridgeState),
 };
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 6d5e164..ada235c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -870,7 +870,8 @@  void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
     }
 }
 
-static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
+static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
+                          SysBusDevice *icc_bridge, Error **errp)
 {
     X86CPU *cpu;
 
@@ -882,6 +883,10 @@  static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, Error **errp)
     object_property_set_int(OBJECT(cpu), apic_id, "apic-id", errp);
     object_property_set_bool(OBJECT(cpu), true, "realized", errp);
 
+    if (icc_bridge != NULL) {
+        icc_bridge_set_cpu_link(OBJECT(icc_bridge), OBJECT(cpu));
+    }
+
     if (error_is_set(errp)) {
         if (cpu != NULL) {
             object_unref(OBJECT(cpu));
@@ -911,7 +916,7 @@  void pc_cpus_init(const char *cpu_model)
                                                  TYPE_ICC_BRIDGE, NULL));
 
     for (i = 0; i < smp_cpus; i++) {
-        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), &error);
+        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), ib, &error);
         if (error) {
             fprintf(stderr, "%s\n", error_get_pretty(error));
             error_free(error);
diff --git a/include/hw/i386/icc_bus.h b/include/hw/i386/icc_bus.h
index 69a0278..bc31cd9 100644
--- a/include/hw/i386/icc_bus.h
+++ b/include/hw/i386/icc_bus.h
@@ -49,5 +49,7 @@  typedef struct ICCDeviceClass {
 
 #define TYPE_ICC_BRIDGE "icc-bridge"
 
+void icc_bridge_set_cpu_link(Object *bridge, Object *cpu);
+
 #endif /* CONFIG_USER_ONLY */
 #endif