Patchwork [27/27] pc: generate APIC IDs according to CPU topology

login
register
mail settings
Submitter Eduardo Habkost
Date Oct. 24, 2012, 5:50 p.m.
Message ID <1351101001-14589-28-git-send-email-ehabkost@redhat.com>
Download mbox | patch
Permalink /patch/193920/
State New
Headers show

Comments

Eduardo Habkost - Oct. 24, 2012, 5:50 p.m.
This keeps compatibility on machine-types pc-1.2 and older, and prints a
warning in case the requested configuration won't get the correct
topology.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
Changes v1 -> v2:
 - Move code to cpu.c
 - keep using cpu_index on *-user
 - Use SMP.contiguous_apic_ids global property
 - Prints warning in case the compatibility mode will expose incorrect
   topology

Changes v2 -> v3:
 - Now all code is inside hw/pc.c
 - Use a real "PC" class and a "contiguous_apic_ids" property

Changes v3 -> v4:
 - Instead of using a global property, use a separate machine init
   function and a PCInitArgs field, to implement compatibility mode
 - Use error_report() instead of fprintf(stderr) for the warning
 - Use a field on PCInitArgs instead of a static variable to check
   if warning was already printed
---
 hw/pc.c      | 19 +++++++++++++++----
 hw/pc.h      |  6 ++++++
 hw/pc_piix.c |  1 +
 3 files changed, 22 insertions(+), 4 deletions(-)
Igor Mammedov - Nov. 1, 2012, 2:46 p.m.
On Wed, 24 Oct 2012 15:50:01 -0200
Eduardo Habkost <ehabkost@redhat.com> wrote:

> This keeps compatibility on machine-types pc-1.2 and older, and prints a
> warning in case the requested configuration won't get the correct
> topology.
> 
> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> ---
> Changes v1 -> v2:
>  - Move code to cpu.c
>  - keep using cpu_index on *-user
>  - Use SMP.contiguous_apic_ids global property
>  - Prints warning in case the compatibility mode will expose incorrect
>    topology
> 
> Changes v2 -> v3:
>  - Now all code is inside hw/pc.c
>  - Use a real "PC" class and a "contiguous_apic_ids" property
> 
> Changes v3 -> v4:
>  - Instead of using a global property, use a separate machine init
>    function and a PCInitArgs field, to implement compatibility mode
>  - Use error_report() instead of fprintf(stderr) for the warning
>  - Use a field on PCInitArgs instead of a static variable to check
>    if warning was already printed
> ---
>  hw/pc.c      | 19 +++++++++++++++----
>  hw/pc.h      |  6 ++++++
>  hw/pc_piix.c |  1 +
>  3 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/pc.c b/hw/pc.c
> index 8895087..3d08271 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -51,6 +51,8 @@
>  #include "exec-memory.h"
>  #include "arch_init.h"
>  #include "bitmap.h"
> +#include "topology.h"
> +#include "cpus.h"
>  
>  /* debug PC/ISA interrupts */
>  //#define DEBUG_IRQ
> @@ -562,10 +564,19 @@ static void bochs_bios_write(void *opaque, uint32_t
> addr, uint32_t val) */
>  static uint32_t apic_id_for_cpu(PCInitArgs *args, int cpu_index)
>  {
> -    /* right now APIC ID == CPU index. this will eventually change to use
> -     * the CPU topology configuration properly
> -     */
> -    return cpu_index;
> +    uint32_t correct_id;
> +
> +    correct_id = topo_apicid_for_cpu(smp_cores, smp_threads, cpu_index);
> +    if (args->compat_contiguous_apic_ids) {
> +        if (cpu_index != correct_id && !args->apic_id_warned) {
> +            error_report("APIC IDs set in compatibility mode, "
> +                         "CPU topology won't match the configuration");
> +            args->apic_id_warned = true;
> +        }
> +        return cpu_index;
> +    } else {
> +        return correct_id;
> +    }
>  }
>  
>  int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
> diff --git a/hw/pc.h b/hw/pc.h
> index 53883f5..a14944a 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -81,6 +81,12 @@ typedef struct PCInitArgs {
>      QEMUMachineInitArgs *qemu_args;
>      bool pci_enabled;
>      bool kvmclock_enabled;
> +    /* Enable compatibility (buggy) APIC ID generation, that keep APIC IDs
> +     * contiguous
> +     */
> +    bool compat_contiguous_apic_ids;
> +    /* Warning message about incorrect APIC ID was shown */
> +    bool apic_id_warned;
>  
>      /* Memory regions & sizes: */
>      MemoryRegion *system_memory;
> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> index 57a3228..79a54e6 100644
> --- a/hw/pc_piix.c
> +++ b/hw/pc_piix.c
> @@ -298,6 +298,7 @@ static void pc_init_pci_v1_2(QEMUMachineInitArgs *args)
>          .qemu_args = args,
>          .pci_enabled = true,
>          .kvmclock_enabled = true,
> +        .compat_contiguous_apic_ids = true,
I suppose libvirt/whatever would know that it's starting v1_2 machine, so it
would be able to provide valid APIC ID at hotplug time. But it looks fragile.

As Anthony once suggested, perhaps we could model sockets as links.
/machine/cpu[apic_id..]
So machine at level we would have a pre-defined topology with apic_id in link
name serving as initial apic_id selector pins in socket.

Pre-allocate them (sockets) in pc_cpus_init() for all possible CPUs and when
creating CPUs, apic_id property setter could accept link path and get
appropriate apic_id from link name and assign itself to a specified link.

It would be friendly with hot-plug and
-device X86CPU,id='/machine/cpu[apic_id]' cmdline if we ever decide in future
to create CPUs this way.

>      };
>      pc_init1(&pc_args);
>  }
Eduardo Habkost - Nov. 1, 2012, 3:16 p.m.
On Thu, Nov 01, 2012 at 03:46:08PM +0100, Igor Mammedov wrote:
> On Wed, 24 Oct 2012 15:50:01 -0200
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
> > This keeps compatibility on machine-types pc-1.2 and older, and prints a
> > warning in case the requested configuration won't get the correct
> > topology.
> > 
> > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> > ---
> > Changes v1 -> v2:
> >  - Move code to cpu.c
> >  - keep using cpu_index on *-user
> >  - Use SMP.contiguous_apic_ids global property
> >  - Prints warning in case the compatibility mode will expose incorrect
> >    topology
> > 
> > Changes v2 -> v3:
> >  - Now all code is inside hw/pc.c
> >  - Use a real "PC" class and a "contiguous_apic_ids" property
> > 
> > Changes v3 -> v4:
> >  - Instead of using a global property, use a separate machine init
> >    function and a PCInitArgs field, to implement compatibility mode
> >  - Use error_report() instead of fprintf(stderr) for the warning
> >  - Use a field on PCInitArgs instead of a static variable to check
> >    if warning was already printed
> > ---
> >  hw/pc.c      | 19 +++++++++++++++----
> >  hw/pc.h      |  6 ++++++
> >  hw/pc_piix.c |  1 +
> >  3 files changed, 22 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/pc.c b/hw/pc.c
> > index 8895087..3d08271 100644
> > --- a/hw/pc.c
> > +++ b/hw/pc.c
> > @@ -51,6 +51,8 @@
> >  #include "exec-memory.h"
> >  #include "arch_init.h"
> >  #include "bitmap.h"
> > +#include "topology.h"
> > +#include "cpus.h"
> >  
> >  /* debug PC/ISA interrupts */
> >  //#define DEBUG_IRQ
> > @@ -562,10 +564,19 @@ static void bochs_bios_write(void *opaque, uint32_t
> > addr, uint32_t val) */
> >  static uint32_t apic_id_for_cpu(PCInitArgs *args, int cpu_index)
> >  {
> > -    /* right now APIC ID == CPU index. this will eventually change to use
> > -     * the CPU topology configuration properly
> > -     */
> > -    return cpu_index;
> > +    uint32_t correct_id;
> > +
> > +    correct_id = topo_apicid_for_cpu(smp_cores, smp_threads, cpu_index);
> > +    if (args->compat_contiguous_apic_ids) {
> > +        if (cpu_index != correct_id && !args->apic_id_warned) {
> > +            error_report("APIC IDs set in compatibility mode, "
> > +                         "CPU topology won't match the configuration");
> > +            args->apic_id_warned = true;
> > +        }
> > +        return cpu_index;
> > +    } else {
> > +        return correct_id;
> > +    }
> >  }
> >  
> >  int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
> > diff --git a/hw/pc.h b/hw/pc.h
> > index 53883f5..a14944a 100644
> > --- a/hw/pc.h
> > +++ b/hw/pc.h
> > @@ -81,6 +81,12 @@ typedef struct PCInitArgs {
> >      QEMUMachineInitArgs *qemu_args;
> >      bool pci_enabled;
> >      bool kvmclock_enabled;
> > +    /* Enable compatibility (buggy) APIC ID generation, that keep APIC IDs
> > +     * contiguous
> > +     */
> > +    bool compat_contiguous_apic_ids;
> > +    /* Warning message about incorrect APIC ID was shown */
> > +    bool apic_id_warned;
> >  
> >      /* Memory regions & sizes: */
> >      MemoryRegion *system_memory;
> > diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> > index 57a3228..79a54e6 100644
> > --- a/hw/pc_piix.c
> > +++ b/hw/pc_piix.c
> > @@ -298,6 +298,7 @@ static void pc_init_pci_v1_2(QEMUMachineInitArgs *args)
> >          .qemu_args = args,
> >          .pci_enabled = true,
> >          .kvmclock_enabled = true,
> > +        .compat_contiguous_apic_ids = true,
> I suppose libvirt/whatever would know that it's starting v1_2 machine, so it
> would be able to provide valid APIC ID at hotplug time. But it looks fragile.

Making the APIC IDs work properly while _not_ exposing them to the
outside is being hard enough, so I really wouldn't like to expose the
APIC IDs to the outside, and depend on libvirt setting APIC IDs
properly.

In either case, we need to make today's libvirt (that knows nothing
about APIC IDs) running qemu-1.2 to be able to live-migrate to a host
using the same libvirt verison, but running qemu-1.3. So it doesn't
matter how we model it internally or how we make the new interfaces look
like, I don't see how we could avoid having a compat_contiguous_apic_ids
flag somewhere, to make "qemu-1.3 -machine pc-1.2" keep the existing
behavior.

> 
> As Anthony once suggested, perhaps we could model sockets as links.
> /machine/cpu[apic_id..]
> So machine at level we would have a pre-defined topology with apic_id in link
> name serving as initial apic_id selector pins in socket.

Are you talking about having a socket/link for each VCPU (thread), or a
socket/link for each CPU package (that may contain multiple cores and
multiples threads).

I was expecting we would end up doing the latter, not the former. In
other words, that we would have:
a /machine/cpu[...] for each CPU socket/package,
a /machine/cpu[...]/core[...] for each core,
a /machine/cpu[...]/core[...]/thread[...] for each VCPU (that is
actually a CPU thread).

In that case, why not use the CPU socket/package index on the link name,
instead of the APIC ID?

Also, the actual CPU package object (containing multiple cores/threads)
will need to get more information from the parent object, not just the
APIC ID. It needs:
- The "base" APIC ID for the threads/cores inside the CPU;
- The number of bits it can use to identify its threads/cores
  (so it doesn't conflict with the APIC IDs used by other sockets).


> 
> Pre-allocate them (sockets) in pc_cpus_init() for all possible CPUs and when
> creating CPUs, apic_id property setter could accept link path and get
> appropriate apic_id from link name and assign itself to a specified link.

That could work. But I wouldn't like to make the modelling work (that is
really taking very long) to get into the way of fixing bugs.


> 
> It would be friendly with hot-plug and
> -device X86CPU,id='/machine/cpu[apic_id]' cmdline if we ever decide in future
> to create CPUs this way.

But if we make the interface based on CPU sockets/packages, not VCPU
threads, we wouldn't even need to expose the APIC IDs on the
externally-visibile -device link, right?


> 
> >      };
> >      pc_init1(&pc_args);
> >  }
>

Patch

diff --git a/hw/pc.c b/hw/pc.c
index 8895087..3d08271 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -51,6 +51,8 @@ 
 #include "exec-memory.h"
 #include "arch_init.h"
 #include "bitmap.h"
+#include "topology.h"
+#include "cpus.h"
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -562,10 +564,19 @@  static void bochs_bios_write(void *opaque, uint32_t addr, uint32_t val)
  */
 static uint32_t apic_id_for_cpu(PCInitArgs *args, int cpu_index)
 {
-    /* right now APIC ID == CPU index. this will eventually change to use
-     * the CPU topology configuration properly
-     */
-    return cpu_index;
+    uint32_t correct_id;
+
+    correct_id = topo_apicid_for_cpu(smp_cores, smp_threads, cpu_index);
+    if (args->compat_contiguous_apic_ids) {
+        if (cpu_index != correct_id && !args->apic_id_warned) {
+            error_report("APIC IDs set in compatibility mode, "
+                         "CPU topology won't match the configuration");
+            args->apic_id_warned = true;
+        }
+        return cpu_index;
+    } else {
+        return correct_id;
+    }
 }
 
 int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
diff --git a/hw/pc.h b/hw/pc.h
index 53883f5..a14944a 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -81,6 +81,12 @@  typedef struct PCInitArgs {
     QEMUMachineInitArgs *qemu_args;
     bool pci_enabled;
     bool kvmclock_enabled;
+    /* Enable compatibility (buggy) APIC ID generation, that keep APIC IDs
+     * contiguous
+     */
+    bool compat_contiguous_apic_ids;
+    /* Warning message about incorrect APIC ID was shown */
+    bool apic_id_warned;
 
     /* Memory regions & sizes: */
     MemoryRegion *system_memory;
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 57a3228..79a54e6 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -298,6 +298,7 @@  static void pc_init_pci_v1_2(QEMUMachineInitArgs *args)
         .qemu_args = args,
         .pci_enabled = true,
         .kvmclock_enabled = true,
+        .compat_contiguous_apic_ids = true,
     };
     pc_init1(&pc_args);
 }