Patchwork [SeaBIOS,2/2] allow CPUs to have non-contiguous Local APIC IDs (v2)

login
register
mail settings
Submitter Eduardo Habkost
Date July 20, 2012, 5:04 p.m.
Message ID <1342803890-1795-3-git-send-email-ehabkost@redhat.com>
Download mbox | patch
Permalink /patch/172312/
State New
Headers show

Comments

Eduardo Habkost - July 20, 2012, 5:04 p.m.
Extract Local APIC IDs directly from the CPUs, and instead of check for
"i < CountCPUs", check if the APIC ID was present on boot, when building
ACPI tables and the MP-Table.

This keeps ACPI Processor ID == APIC ID, but allows the
hardware<->SeaBIOS interface be completely APIC-ID based and not depend
on any other kind of "CPU identifier". This way, SeaBIOS may change the
way ACPI Processor IDs are chosen in the future.

As currently SeaBIOS supports only xAPIC and not x2APIC, the list of
present-on-boot APIC IDs is a 256-bit bitmap. If one day SeaBIOS starts
to support x2APIC, the data structure used to enumerate the APIC IDs
will have to be changed (but this is an internal implementation detail,
not visible to the OS or on any hardware<=>SeaBIOS interface).

For current QEMU versions (that always make the APIC IDs contiguous),
the OS-visible behavior and resulting ACPI tables should be exactly the
same. This patch will simply allow QEMU to start setting non-contiguous
APIC IDs (that is a requirement for some sockets/cores/threads topology
settings).

Changes v1 -> v2:
 - Use size suffixes on all asm instructions on smp.c
 - New patch description

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
 src/acpi-dsdt.dsl |    4 +++-
 src/acpi.c        |    9 +++++----
 src/mptable.c     |    2 +-
 src/smp.c         |   17 +++++++++++++++++
 src/util.h        |    1 +
 5 files changed, 27 insertions(+), 6 deletions(-)
Gleb Natapov - July 23, 2012, 12:20 p.m.
On Fri, Jul 20, 2012 at 02:04:50PM -0300, Eduardo Habkost wrote:
> Extract Local APIC IDs directly from the CPUs, and instead of check for
> "i < CountCPUs", check if the APIC ID was present on boot, when building
> ACPI tables and the MP-Table.
> 
> This keeps ACPI Processor ID == APIC ID, but allows the
> hardware<->SeaBIOS interface be completely APIC-ID based and not depend
> on any other kind of "CPU identifier". This way, SeaBIOS may change the
> way ACPI Processor IDs are chosen in the future.
> 
> As currently SeaBIOS supports only xAPIC and not x2APIC, the list of
> present-on-boot APIC IDs is a 256-bit bitmap. If one day SeaBIOS starts
> to support x2APIC, the data structure used to enumerate the APIC IDs
> will have to be changed (but this is an internal implementation detail,
> not visible to the OS or on any hardware<=>SeaBIOS interface).
> 
> For current QEMU versions (that always make the APIC IDs contiguous),
> the OS-visible behavior and resulting ACPI tables should be exactly the
> same. This patch will simply allow QEMU to start setting non-contiguous
> APIC IDs (that is a requirement for some sockets/cores/threads topology
> settings).
> 
> Changes v1 -> v2:
>  - Use size suffixes on all asm instructions on smp.c
>  - New patch description
> 
> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> ---
>  src/acpi-dsdt.dsl |    4 +++-
>  src/acpi.c        |    9 +++++----
>  src/mptable.c     |    2 +-
>  src/smp.c         |   17 +++++++++++++++++
>  src/util.h        |    1 +
>  5 files changed, 27 insertions(+), 6 deletions(-)
> 
> diff --git a/src/acpi-dsdt.dsl b/src/acpi-dsdt.dsl
> index 2060686..72dc7d8 100644
> --- a/src/acpi-dsdt.dsl
> +++ b/src/acpi-dsdt.dsl
> @@ -676,6 +676,7 @@ DefinitionBlock (
>          /* Methods called by run-time generated SSDT Processor objects */
>          Method (CPMA, 1, NotSerialized) {
>              // _MAT method - create an madt apic buffer
> +            // Arg0 = Processor ID = Local APIC ID
>              // Local0 = CPON flag for this cpu
>              Store(DerefOf(Index(CPON, Arg0)), Local0)
>              // Local1 = Buffer (in madt apic form) to return
> @@ -688,6 +689,7 @@ DefinitionBlock (
>          }
>          Method (CPST, 1, NotSerialized) {
>              // _STA method - return ON status of cpu
> +            // Arg0 = Processor ID = Local APIC ID
>              // Local0 = CPON flag for this cpu
>              Store(DerefOf(Index(CPON, Arg0)), Local0)
>              If (Local0) { Return(0xF) } Else { Return(0x0) }
> @@ -708,7 +710,7 @@ DefinitionBlock (
>              Store (PRS, Local5)
>              // Local2 = last read byte from bitmap
>              Store (Zero, Local2)
> -            // Local0 = cpuid iterator
> +            // Local0 = Processor ID / APIC ID iterator
>              Store (Zero, Local0)
>              While (LLess(Local0, SizeOf(CPON))) {
>                  // Local1 = CPON flag for this cpu
> diff --git a/src/acpi.c b/src/acpi.c
> index da3bc57..39b7172 100644
> --- a/src/acpi.c
> +++ b/src/acpi.c
> @@ -327,7 +327,7 @@ build_madt(void)
>          apic->length = sizeof(*apic);
>          apic->processor_id = i;
>          apic->local_apic_id = i;
> -        if (i < CountCPUs)
> +        if (apic_id_is_present(apic->local_apic_id))
>              apic->flags = cpu_to_le32(1);
>          else
>              apic->flags = cpu_to_le32(0);
> @@ -445,6 +445,7 @@ build_ssdt(void)
>      }
>  
>      // build "Method(NTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CP00, Arg1)} ...}"
> +    // Arg0 = Processor ID = APIC ID
>      *(ssdt_ptr++) = 0x14; // MethodOp
>      ssdt_ptr = encodeLen(ssdt_ptr, 2+5+(12*acpi_cpus), 2);
>      *(ssdt_ptr++) = 'N';
> @@ -477,7 +478,7 @@ build_ssdt(void)
>      ssdt_ptr = encodeLen(ssdt_ptr, 2+1+(1*acpi_cpus), 2);
>      *(ssdt_ptr++) = acpi_cpus;
>      for (i=0; i<acpi_cpus; i++)
> -        *(ssdt_ptr++) = (i < CountCPUs) ? 0x01 : 0x00;
> +        *(ssdt_ptr++) = (apic_id_is_present(i)) ? 0x01 : 0x00;
>  
>      // store pci io windows: start, end, length
>      // this way we don't have to do the math in the dsdt
> @@ -656,10 +657,10 @@ build_srat(void)
>          core->proximity_lo = curnode;
>          memset(core->proximity_hi, 0, 3);
>          core->local_sapic_eid = 0;
> -        if (i < CountCPUs)
> +        if (apic_id_is_present(i))
>              core->flags = cpu_to_le32(1);
>          else
> -            core->flags = 0;
> +            core->flags = cpu_to_le32(0);
>          core++;
>      }
>  
> diff --git a/src/mptable.c b/src/mptable.c
> index 103f462..9406f98 100644
> --- a/src/mptable.c
> +++ b/src/mptable.c
> @@ -59,7 +59,7 @@ mptable_init(void)
>          cpu->apicid = i;
>          cpu->apicver = apic_version;
>          /* cpu flags: enabled, bootstrap cpu */
> -        cpu->cpuflag = ((i<CountCPUs) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
> +        cpu->cpuflag = (apic_id_is_present(i) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
>          cpu->cpusignature = cpuid_signature;
>          cpu->featureflag = cpuid_features;
>          cpu++;
> diff --git a/src/smp.c b/src/smp.c
> index 8c077a1..3c36f8c 100644
> --- a/src/smp.c
> +++ b/src/smp.c
> @@ -36,6 +36,8 @@ wrmsr_smp(u32 index, u64 val)
>  
>  u32 CountCPUs VAR16VISIBLE;
>  u32 MaxCountCPUs VAR16VISIBLE;
> +// 256 bits for the found APIC IDs
> +u32 FoundAPICIDs[256/32] VAR16VISIBLE;
>  extern void smp_ap_boot_code(void);
>  ASM16(
>      "  .global smp_ap_boot_code\n"
> @@ -59,6 +61,12 @@ ASM16(
>      "  jmp 1b\n"
>      "2:\n"
>  
> +    // get apic ID on EBX, set bit on FoundAPICIDs
> +    "  movl $1, %eax\n"
> +    "  cpuid\n"
> +    "  shrl $24, %ebx\n"
> +    "  lock btsl %ebx, FoundAPICIDs\n"
> +
>      // Increment the cpu counter
>      "  lock incl CountCPUs\n"
You can get rid of CountCPUs by calculating FoundAPICIDs bitmap weight.

>  
> @@ -67,6 +75,11 @@ ASM16(
>      "  jmp 1b\n"
>      );
>  
> +int apic_id_is_present(u8 apic_id)
> +{
> +    return FoundAPICIDs[apic_id/32] & (1 << (apic_id % 32));
> +}
> +
>  // find and initialize the CPUs by launching a SIPI to them
>  void
>  smp_probe(void)
> @@ -82,6 +95,10 @@ smp_probe(void)
>          return;
>      }
>  
> +    // mark the BSP initial APIC ID as found, too:
> +    u8 apic_id = ebx>>24;
> +    FoundAPICIDs[apic_id/32] |= (1 << (apic_id % 32));
> +
>      // Init the counter.
>      writel(&CountCPUs, 1);
>  
> diff --git a/src/util.h b/src/util.h
> index ef8ec7c..89e928c 100644
> --- a/src/util.h
> +++ b/src/util.h
> @@ -355,6 +355,7 @@ extern u32 CountCPUs;
>  extern u32 MaxCountCPUs;
>  void wrmsr_smp(u32 index, u64 val);
>  void smp_probe(void);
> +int apic_id_is_present(u8 apic_id);
>  
>  // coreboot.c
>  extern const char *CBvendor, *CBpart;
> -- 
> 1.7.10.4

--
			Gleb.
Eduardo Habkost - July 25, 2012, 6:42 p.m.
On Mon, Jul 23, 2012 at 03:20:14PM +0300, Gleb Natapov wrote:
> On Fri, Jul 20, 2012 at 02:04:50PM -0300, Eduardo Habkost wrote:
> > Extract Local APIC IDs directly from the CPUs, and instead of check for
> > "i < CountCPUs", check if the APIC ID was present on boot, when building
> > ACPI tables and the MP-Table.
> > 
> > This keeps ACPI Processor ID == APIC ID, but allows the
> > hardware<->SeaBIOS interface be completely APIC-ID based and not depend
> > on any other kind of "CPU identifier". This way, SeaBIOS may change the
> > way ACPI Processor IDs are chosen in the future.
> > 
> > As currently SeaBIOS supports only xAPIC and not x2APIC, the list of
> > present-on-boot APIC IDs is a 256-bit bitmap. If one day SeaBIOS starts
> > to support x2APIC, the data structure used to enumerate the APIC IDs
> > will have to be changed (but this is an internal implementation detail,
> > not visible to the OS or on any hardware<=>SeaBIOS interface).
> > 
> > For current QEMU versions (that always make the APIC IDs contiguous),
> > the OS-visible behavior and resulting ACPI tables should be exactly the
> > same. This patch will simply allow QEMU to start setting non-contiguous
> > APIC IDs (that is a requirement for some sockets/cores/threads topology
> > settings).
> > 
> > Changes v1 -> v2:
> >  - Use size suffixes on all asm instructions on smp.c
> >  - New patch description
> > 
> > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> > ---
> >  src/acpi-dsdt.dsl |    4 +++-
> >  src/acpi.c        |    9 +++++----
> >  src/mptable.c     |    2 +-
> >  src/smp.c         |   17 +++++++++++++++++
> >  src/util.h        |    1 +
> >  5 files changed, 27 insertions(+), 6 deletions(-)
> > 
> > diff --git a/src/acpi-dsdt.dsl b/src/acpi-dsdt.dsl
> > index 2060686..72dc7d8 100644
> > --- a/src/acpi-dsdt.dsl
> > +++ b/src/acpi-dsdt.dsl
> > @@ -676,6 +676,7 @@ DefinitionBlock (
> >          /* Methods called by run-time generated SSDT Processor objects */
> >          Method (CPMA, 1, NotSerialized) {
> >              // _MAT method - create an madt apic buffer
> > +            // Arg0 = Processor ID = Local APIC ID
> >              // Local0 = CPON flag for this cpu
> >              Store(DerefOf(Index(CPON, Arg0)), Local0)
> >              // Local1 = Buffer (in madt apic form) to return
> > @@ -688,6 +689,7 @@ DefinitionBlock (
> >          }
> >          Method (CPST, 1, NotSerialized) {
> >              // _STA method - return ON status of cpu
> > +            // Arg0 = Processor ID = Local APIC ID
> >              // Local0 = CPON flag for this cpu
> >              Store(DerefOf(Index(CPON, Arg0)), Local0)
> >              If (Local0) { Return(0xF) } Else { Return(0x0) }
> > @@ -708,7 +710,7 @@ DefinitionBlock (
> >              Store (PRS, Local5)
> >              // Local2 = last read byte from bitmap
> >              Store (Zero, Local2)
> > -            // Local0 = cpuid iterator
> > +            // Local0 = Processor ID / APIC ID iterator
> >              Store (Zero, Local0)
> >              While (LLess(Local0, SizeOf(CPON))) {
> >                  // Local1 = CPON flag for this cpu
> > diff --git a/src/acpi.c b/src/acpi.c
> > index da3bc57..39b7172 100644
> > --- a/src/acpi.c
> > +++ b/src/acpi.c
> > @@ -327,7 +327,7 @@ build_madt(void)
> >          apic->length = sizeof(*apic);
> >          apic->processor_id = i;
> >          apic->local_apic_id = i;
> > -        if (i < CountCPUs)
> > +        if (apic_id_is_present(apic->local_apic_id))
> >              apic->flags = cpu_to_le32(1);
> >          else
> >              apic->flags = cpu_to_le32(0);
> > @@ -445,6 +445,7 @@ build_ssdt(void)
> >      }
> >  
> >      // build "Method(NTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CP00, Arg1)} ...}"
> > +    // Arg0 = Processor ID = APIC ID
> >      *(ssdt_ptr++) = 0x14; // MethodOp
> >      ssdt_ptr = encodeLen(ssdt_ptr, 2+5+(12*acpi_cpus), 2);
> >      *(ssdt_ptr++) = 'N';
> > @@ -477,7 +478,7 @@ build_ssdt(void)
> >      ssdt_ptr = encodeLen(ssdt_ptr, 2+1+(1*acpi_cpus), 2);
> >      *(ssdt_ptr++) = acpi_cpus;
> >      for (i=0; i<acpi_cpus; i++)
> > -        *(ssdt_ptr++) = (i < CountCPUs) ? 0x01 : 0x00;
> > +        *(ssdt_ptr++) = (apic_id_is_present(i)) ? 0x01 : 0x00;
> >  
> >      // store pci io windows: start, end, length
> >      // this way we don't have to do the math in the dsdt
> > @@ -656,10 +657,10 @@ build_srat(void)
> >          core->proximity_lo = curnode;
> >          memset(core->proximity_hi, 0, 3);
> >          core->local_sapic_eid = 0;
> > -        if (i < CountCPUs)
> > +        if (apic_id_is_present(i))
> >              core->flags = cpu_to_le32(1);
> >          else
> > -            core->flags = 0;
> > +            core->flags = cpu_to_le32(0);
> >          core++;
> >      }
> >  
> > diff --git a/src/mptable.c b/src/mptable.c
> > index 103f462..9406f98 100644
> > --- a/src/mptable.c
> > +++ b/src/mptable.c
> > @@ -59,7 +59,7 @@ mptable_init(void)
> >          cpu->apicid = i;
> >          cpu->apicver = apic_version;
> >          /* cpu flags: enabled, bootstrap cpu */
> > -        cpu->cpuflag = ((i<CountCPUs) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
> > +        cpu->cpuflag = (apic_id_is_present(i) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
> >          cpu->cpusignature = cpuid_signature;
> >          cpu->featureflag = cpuid_features;
> >          cpu++;
> > diff --git a/src/smp.c b/src/smp.c
> > index 8c077a1..3c36f8c 100644
> > --- a/src/smp.c
> > +++ b/src/smp.c
> > @@ -36,6 +36,8 @@ wrmsr_smp(u32 index, u64 val)
> >  
> >  u32 CountCPUs VAR16VISIBLE;
> >  u32 MaxCountCPUs VAR16VISIBLE;
> > +// 256 bits for the found APIC IDs
> > +u32 FoundAPICIDs[256/32] VAR16VISIBLE;
> >  extern void smp_ap_boot_code(void);
> >  ASM16(
> >      "  .global smp_ap_boot_code\n"
> > @@ -59,6 +61,12 @@ ASM16(
> >      "  jmp 1b\n"
> >      "2:\n"
> >  
> > +    // get apic ID on EBX, set bit on FoundAPICIDs
> > +    "  movl $1, %eax\n"
> > +    "  cpuid\n"
> > +    "  shrl $24, %ebx\n"
> > +    "  lock btsl %ebx, FoundAPICIDs\n"
> > +
> >      // Increment the cpu counter
> >      "  lock incl CountCPUs\n"
> You can get rid of CountCPUs by calculating FoundAPICIDs bitmap weight.

I was going to do that after you suggested, but then I saw this:

        while (cmos_smp_count + 1 != readl(&CountCPUs))
            yield();

It's possible to replace the atomic read of CountCPUs with the bitmap weight
calculation on the loop, but: is it really worth it?

> 
> >  
> > @@ -67,6 +75,11 @@ ASM16(
> >      "  jmp 1b\n"
> >      );
> >  
> > +int apic_id_is_present(u8 apic_id)
> > +{
> > +    return FoundAPICIDs[apic_id/32] & (1 << (apic_id % 32));
> > +}
> > +
> >  // find and initialize the CPUs by launching a SIPI to them
> >  void
> >  smp_probe(void)
> > @@ -82,6 +95,10 @@ smp_probe(void)
> >          return;
> >      }
> >  
> > +    // mark the BSP initial APIC ID as found, too:
> > +    u8 apic_id = ebx>>24;
> > +    FoundAPICIDs[apic_id/32] |= (1 << (apic_id % 32));
> > +
> >      // Init the counter.
> >      writel(&CountCPUs, 1);
> >  
> > diff --git a/src/util.h b/src/util.h
> > index ef8ec7c..89e928c 100644
> > --- a/src/util.h
> > +++ b/src/util.h
> > @@ -355,6 +355,7 @@ extern u32 CountCPUs;
> >  extern u32 MaxCountCPUs;
> >  void wrmsr_smp(u32 index, u64 val);
> >  void smp_probe(void);
> > +int apic_id_is_present(u8 apic_id);
> >  
> >  // coreboot.c
> >  extern const char *CBvendor, *CBpart;
> > -- 
> > 1.7.10.4
> 
> --
> 			Gleb.
>
Gleb Natapov - July 26, 2012, 6:08 a.m.
On Wed, Jul 25, 2012 at 03:42:21PM -0300, Eduardo Habkost wrote:
> On Mon, Jul 23, 2012 at 03:20:14PM +0300, Gleb Natapov wrote:
> > On Fri, Jul 20, 2012 at 02:04:50PM -0300, Eduardo Habkost wrote:
> > > Extract Local APIC IDs directly from the CPUs, and instead of check for
> > > "i < CountCPUs", check if the APIC ID was present on boot, when building
> > > ACPI tables and the MP-Table.
> > > 
> > > This keeps ACPI Processor ID == APIC ID, but allows the
> > > hardware<->SeaBIOS interface be completely APIC-ID based and not depend
> > > on any other kind of "CPU identifier". This way, SeaBIOS may change the
> > > way ACPI Processor IDs are chosen in the future.
> > > 
> > > As currently SeaBIOS supports only xAPIC and not x2APIC, the list of
> > > present-on-boot APIC IDs is a 256-bit bitmap. If one day SeaBIOS starts
> > > to support x2APIC, the data structure used to enumerate the APIC IDs
> > > will have to be changed (but this is an internal implementation detail,
> > > not visible to the OS or on any hardware<=>SeaBIOS interface).
> > > 
> > > For current QEMU versions (that always make the APIC IDs contiguous),
> > > the OS-visible behavior and resulting ACPI tables should be exactly the
> > > same. This patch will simply allow QEMU to start setting non-contiguous
> > > APIC IDs (that is a requirement for some sockets/cores/threads topology
> > > settings).
> > > 
> > > Changes v1 -> v2:
> > >  - Use size suffixes on all asm instructions on smp.c
> > >  - New patch description
> > > 
> > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> > > ---
> > >  src/acpi-dsdt.dsl |    4 +++-
> > >  src/acpi.c        |    9 +++++----
> > >  src/mptable.c     |    2 +-
> > >  src/smp.c         |   17 +++++++++++++++++
> > >  src/util.h        |    1 +
> > >  5 files changed, 27 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/src/acpi-dsdt.dsl b/src/acpi-dsdt.dsl
> > > index 2060686..72dc7d8 100644
> > > --- a/src/acpi-dsdt.dsl
> > > +++ b/src/acpi-dsdt.dsl
> > > @@ -676,6 +676,7 @@ DefinitionBlock (
> > >          /* Methods called by run-time generated SSDT Processor objects */
> > >          Method (CPMA, 1, NotSerialized) {
> > >              // _MAT method - create an madt apic buffer
> > > +            // Arg0 = Processor ID = Local APIC ID
> > >              // Local0 = CPON flag for this cpu
> > >              Store(DerefOf(Index(CPON, Arg0)), Local0)
> > >              // Local1 = Buffer (in madt apic form) to return
> > > @@ -688,6 +689,7 @@ DefinitionBlock (
> > >          }
> > >          Method (CPST, 1, NotSerialized) {
> > >              // _STA method - return ON status of cpu
> > > +            // Arg0 = Processor ID = Local APIC ID
> > >              // Local0 = CPON flag for this cpu
> > >              Store(DerefOf(Index(CPON, Arg0)), Local0)
> > >              If (Local0) { Return(0xF) } Else { Return(0x0) }
> > > @@ -708,7 +710,7 @@ DefinitionBlock (
> > >              Store (PRS, Local5)
> > >              // Local2 = last read byte from bitmap
> > >              Store (Zero, Local2)
> > > -            // Local0 = cpuid iterator
> > > +            // Local0 = Processor ID / APIC ID iterator
> > >              Store (Zero, Local0)
> > >              While (LLess(Local0, SizeOf(CPON))) {
> > >                  // Local1 = CPON flag for this cpu
> > > diff --git a/src/acpi.c b/src/acpi.c
> > > index da3bc57..39b7172 100644
> > > --- a/src/acpi.c
> > > +++ b/src/acpi.c
> > > @@ -327,7 +327,7 @@ build_madt(void)
> > >          apic->length = sizeof(*apic);
> > >          apic->processor_id = i;
> > >          apic->local_apic_id = i;
> > > -        if (i < CountCPUs)
> > > +        if (apic_id_is_present(apic->local_apic_id))
> > >              apic->flags = cpu_to_le32(1);
> > >          else
> > >              apic->flags = cpu_to_le32(0);
> > > @@ -445,6 +445,7 @@ build_ssdt(void)
> > >      }
> > >  
> > >      // build "Method(NTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CP00, Arg1)} ...}"
> > > +    // Arg0 = Processor ID = APIC ID
> > >      *(ssdt_ptr++) = 0x14; // MethodOp
> > >      ssdt_ptr = encodeLen(ssdt_ptr, 2+5+(12*acpi_cpus), 2);
> > >      *(ssdt_ptr++) = 'N';
> > > @@ -477,7 +478,7 @@ build_ssdt(void)
> > >      ssdt_ptr = encodeLen(ssdt_ptr, 2+1+(1*acpi_cpus), 2);
> > >      *(ssdt_ptr++) = acpi_cpus;
> > >      for (i=0; i<acpi_cpus; i++)
> > > -        *(ssdt_ptr++) = (i < CountCPUs) ? 0x01 : 0x00;
> > > +        *(ssdt_ptr++) = (apic_id_is_present(i)) ? 0x01 : 0x00;
> > >  
> > >      // store pci io windows: start, end, length
> > >      // this way we don't have to do the math in the dsdt
> > > @@ -656,10 +657,10 @@ build_srat(void)
> > >          core->proximity_lo = curnode;
> > >          memset(core->proximity_hi, 0, 3);
> > >          core->local_sapic_eid = 0;
> > > -        if (i < CountCPUs)
> > > +        if (apic_id_is_present(i))
> > >              core->flags = cpu_to_le32(1);
> > >          else
> > > -            core->flags = 0;
> > > +            core->flags = cpu_to_le32(0);
> > >          core++;
> > >      }
> > >  
> > > diff --git a/src/mptable.c b/src/mptable.c
> > > index 103f462..9406f98 100644
> > > --- a/src/mptable.c
> > > +++ b/src/mptable.c
> > > @@ -59,7 +59,7 @@ mptable_init(void)
> > >          cpu->apicid = i;
> > >          cpu->apicver = apic_version;
> > >          /* cpu flags: enabled, bootstrap cpu */
> > > -        cpu->cpuflag = ((i<CountCPUs) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
> > > +        cpu->cpuflag = (apic_id_is_present(i) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
> > >          cpu->cpusignature = cpuid_signature;
> > >          cpu->featureflag = cpuid_features;
> > >          cpu++;
> > > diff --git a/src/smp.c b/src/smp.c
> > > index 8c077a1..3c36f8c 100644
> > > --- a/src/smp.c
> > > +++ b/src/smp.c
> > > @@ -36,6 +36,8 @@ wrmsr_smp(u32 index, u64 val)
> > >  
> > >  u32 CountCPUs VAR16VISIBLE;
> > >  u32 MaxCountCPUs VAR16VISIBLE;
> > > +// 256 bits for the found APIC IDs
> > > +u32 FoundAPICIDs[256/32] VAR16VISIBLE;
> > >  extern void smp_ap_boot_code(void);
> > >  ASM16(
> > >      "  .global smp_ap_boot_code\n"
> > > @@ -59,6 +61,12 @@ ASM16(
> > >      "  jmp 1b\n"
> > >      "2:\n"
> > >  
> > > +    // get apic ID on EBX, set bit on FoundAPICIDs
> > > +    "  movl $1, %eax\n"
> > > +    "  cpuid\n"
> > > +    "  shrl $24, %ebx\n"
> > > +    "  lock btsl %ebx, FoundAPICIDs\n"
> > > +
> > >      // Increment the cpu counter
> > >      "  lock incl CountCPUs\n"
> > You can get rid of CountCPUs by calculating FoundAPICIDs bitmap weight.
> 
> I was going to do that after you suggested, but then I saw this:
> 
>         while (cmos_smp_count + 1 != readl(&CountCPUs))
>             yield();
> 
> It's possible to replace the atomic read of CountCPUs with the bitmap weight
> calculation on the loop, but: is it really worth it?
> 
Why not? This eliminates one more global state.

--
			Gleb.
Eduardo Habkost - July 26, 2012, 2:16 p.m.
On Thu, Jul 26, 2012 at 09:08:40AM +0300, Gleb Natapov wrote:
> On Wed, Jul 25, 2012 at 03:42:21PM -0300, Eduardo Habkost wrote:
> > On Mon, Jul 23, 2012 at 03:20:14PM +0300, Gleb Natapov wrote:
> > > On Fri, Jul 20, 2012 at 02:04:50PM -0300, Eduardo Habkost wrote:
> > > > Extract Local APIC IDs directly from the CPUs, and instead of check for
> > > > "i < CountCPUs", check if the APIC ID was present on boot, when building
> > > > ACPI tables and the MP-Table.
> > > > 
> > > > This keeps ACPI Processor ID == APIC ID, but allows the
> > > > hardware<->SeaBIOS interface be completely APIC-ID based and not depend
> > > > on any other kind of "CPU identifier". This way, SeaBIOS may change the
> > > > way ACPI Processor IDs are chosen in the future.
> > > > 
> > > > As currently SeaBIOS supports only xAPIC and not x2APIC, the list of
> > > > present-on-boot APIC IDs is a 256-bit bitmap. If one day SeaBIOS starts
> > > > to support x2APIC, the data structure used to enumerate the APIC IDs
> > > > will have to be changed (but this is an internal implementation detail,
> > > > not visible to the OS or on any hardware<=>SeaBIOS interface).
> > > > 
> > > > For current QEMU versions (that always make the APIC IDs contiguous),
> > > > the OS-visible behavior and resulting ACPI tables should be exactly the
> > > > same. This patch will simply allow QEMU to start setting non-contiguous
> > > > APIC IDs (that is a requirement for some sockets/cores/threads topology
> > > > settings).
> > > > 
> > > > Changes v1 -> v2:
> > > >  - Use size suffixes on all asm instructions on smp.c
> > > >  - New patch description
> > > > 
> > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> > > > ---
> > > >  src/acpi-dsdt.dsl |    4 +++-
> > > >  src/acpi.c        |    9 +++++----
> > > >  src/mptable.c     |    2 +-
> > > >  src/smp.c         |   17 +++++++++++++++++
> > > >  src/util.h        |    1 +
> > > >  5 files changed, 27 insertions(+), 6 deletions(-)
> > > > 
> > > > diff --git a/src/acpi-dsdt.dsl b/src/acpi-dsdt.dsl
> > > > index 2060686..72dc7d8 100644
> > > > --- a/src/acpi-dsdt.dsl
> > > > +++ b/src/acpi-dsdt.dsl
> > > > @@ -676,6 +676,7 @@ DefinitionBlock (
> > > >          /* Methods called by run-time generated SSDT Processor objects */
> > > >          Method (CPMA, 1, NotSerialized) {
> > > >              // _MAT method - create an madt apic buffer
> > > > +            // Arg0 = Processor ID = Local APIC ID
> > > >              // Local0 = CPON flag for this cpu
> > > >              Store(DerefOf(Index(CPON, Arg0)), Local0)
> > > >              // Local1 = Buffer (in madt apic form) to return
> > > > @@ -688,6 +689,7 @@ DefinitionBlock (
> > > >          }
> > > >          Method (CPST, 1, NotSerialized) {
> > > >              // _STA method - return ON status of cpu
> > > > +            // Arg0 = Processor ID = Local APIC ID
> > > >              // Local0 = CPON flag for this cpu
> > > >              Store(DerefOf(Index(CPON, Arg0)), Local0)
> > > >              If (Local0) { Return(0xF) } Else { Return(0x0) }
> > > > @@ -708,7 +710,7 @@ DefinitionBlock (
> > > >              Store (PRS, Local5)
> > > >              // Local2 = last read byte from bitmap
> > > >              Store (Zero, Local2)
> > > > -            // Local0 = cpuid iterator
> > > > +            // Local0 = Processor ID / APIC ID iterator
> > > >              Store (Zero, Local0)
> > > >              While (LLess(Local0, SizeOf(CPON))) {
> > > >                  // Local1 = CPON flag for this cpu
> > > > diff --git a/src/acpi.c b/src/acpi.c
> > > > index da3bc57..39b7172 100644
> > > > --- a/src/acpi.c
> > > > +++ b/src/acpi.c
> > > > @@ -327,7 +327,7 @@ build_madt(void)
> > > >          apic->length = sizeof(*apic);
> > > >          apic->processor_id = i;
> > > >          apic->local_apic_id = i;
> > > > -        if (i < CountCPUs)
> > > > +        if (apic_id_is_present(apic->local_apic_id))
> > > >              apic->flags = cpu_to_le32(1);
> > > >          else
> > > >              apic->flags = cpu_to_le32(0);
> > > > @@ -445,6 +445,7 @@ build_ssdt(void)
> > > >      }
> > > >  
> > > >      // build "Method(NTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CP00, Arg1)} ...}"
> > > > +    // Arg0 = Processor ID = APIC ID
> > > >      *(ssdt_ptr++) = 0x14; // MethodOp
> > > >      ssdt_ptr = encodeLen(ssdt_ptr, 2+5+(12*acpi_cpus), 2);
> > > >      *(ssdt_ptr++) = 'N';
> > > > @@ -477,7 +478,7 @@ build_ssdt(void)
> > > >      ssdt_ptr = encodeLen(ssdt_ptr, 2+1+(1*acpi_cpus), 2);
> > > >      *(ssdt_ptr++) = acpi_cpus;
> > > >      for (i=0; i<acpi_cpus; i++)
> > > > -        *(ssdt_ptr++) = (i < CountCPUs) ? 0x01 : 0x00;
> > > > +        *(ssdt_ptr++) = (apic_id_is_present(i)) ? 0x01 : 0x00;
> > > >  
> > > >      // store pci io windows: start, end, length
> > > >      // this way we don't have to do the math in the dsdt
> > > > @@ -656,10 +657,10 @@ build_srat(void)
> > > >          core->proximity_lo = curnode;
> > > >          memset(core->proximity_hi, 0, 3);
> > > >          core->local_sapic_eid = 0;
> > > > -        if (i < CountCPUs)
> > > > +        if (apic_id_is_present(i))
> > > >              core->flags = cpu_to_le32(1);
> > > >          else
> > > > -            core->flags = 0;
> > > > +            core->flags = cpu_to_le32(0);
> > > >          core++;
> > > >      }
> > > >  
> > > > diff --git a/src/mptable.c b/src/mptable.c
> > > > index 103f462..9406f98 100644
> > > > --- a/src/mptable.c
> > > > +++ b/src/mptable.c
> > > > @@ -59,7 +59,7 @@ mptable_init(void)
> > > >          cpu->apicid = i;
> > > >          cpu->apicver = apic_version;
> > > >          /* cpu flags: enabled, bootstrap cpu */
> > > > -        cpu->cpuflag = ((i<CountCPUs) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
> > > > +        cpu->cpuflag = (apic_id_is_present(i) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
> > > >          cpu->cpusignature = cpuid_signature;
> > > >          cpu->featureflag = cpuid_features;
> > > >          cpu++;
> > > > diff --git a/src/smp.c b/src/smp.c
> > > > index 8c077a1..3c36f8c 100644
> > > > --- a/src/smp.c
> > > > +++ b/src/smp.c
> > > > @@ -36,6 +36,8 @@ wrmsr_smp(u32 index, u64 val)
> > > >  
> > > >  u32 CountCPUs VAR16VISIBLE;
> > > >  u32 MaxCountCPUs VAR16VISIBLE;
> > > > +// 256 bits for the found APIC IDs
> > > > +u32 FoundAPICIDs[256/32] VAR16VISIBLE;
> > > >  extern void smp_ap_boot_code(void);
> > > >  ASM16(
> > > >      "  .global smp_ap_boot_code\n"
> > > > @@ -59,6 +61,12 @@ ASM16(
> > > >      "  jmp 1b\n"
> > > >      "2:\n"
> > > >  
> > > > +    // get apic ID on EBX, set bit on FoundAPICIDs
> > > > +    "  movl $1, %eax\n"
> > > > +    "  cpuid\n"
> > > > +    "  shrl $24, %ebx\n"
> > > > +    "  lock btsl %ebx, FoundAPICIDs\n"
> > > > +
> > > >      // Increment the cpu counter
> > > >      "  lock incl CountCPUs\n"
> > > You can get rid of CountCPUs by calculating FoundAPICIDs bitmap weight.
> > 
> > I was going to do that after you suggested, but then I saw this:
> > 
> >         while (cmos_smp_count + 1 != readl(&CountCPUs))
> >             yield();
> > 
> > It's possible to replace the atomic read of CountCPUs with the bitmap weight
> > calculation on the loop, but: is it really worth it?
> > 
> Why not? This eliminates one more global state.

Maybe we can simply make it stop being global and be used only by the
smp.c initialization code?

Even if the variable didn't exist yet, I think I would add it myself:
it's simpler and more efficient to calculate the bitmap weight once,
while filling the bitmap, than recalculating it every time on the
while(cmos_smp_count) loop.
Avi Kivity - July 26, 2012, 2:55 p.m.
On 07/26/2012 05:16 PM, Eduardo Habkost wrote:
>> > 
>> > It's possible to replace the atomic read of CountCPUs with the bitmap weight
>> > calculation on the loop, but: is it really worth it?
>> > 
>> Why not? This eliminates one more global state.
> 
> Maybe we can simply make it stop being global and be used only by the
> smp.c initialization code?
> 
> Even if the variable didn't exist yet, I think I would add it myself:
> it's simpler and more efficient to calculate the bitmap weight once,
> while filling the bitmap, than recalculating it every time on the
> while(cmos_smp_count) loop.

So you're spinning more efficiently?
Eduardo Habkost - July 26, 2012, 3:07 p.m.
On Thu, Jul 26, 2012 at 05:55:09PM +0300, Avi Kivity wrote:
> On 07/26/2012 05:16 PM, Eduardo Habkost wrote:
> >> > 
> >> > It's possible to replace the atomic read of CountCPUs with the bitmap weight
> >> > calculation on the loop, but: is it really worth it?
> >> > 
> >> Why not? This eliminates one more global state.
> > 
> > Maybe we can simply make it stop being global and be used only by the
> > smp.c initialization code?
> > 
> > Even if the variable didn't exist yet, I think I would add it myself:
> > it's simpler and more efficient to calculate the bitmap weight once,
> > while filling the bitmap, than recalculating it every time on the
> > while(cmos_smp_count) loop.
> 
> So you're spinning more efficiently?

No, coding more efficiently. I'm lazy. :-)

I don't want to risk breaking that part of the code to save 2 bytes of
memory. It's possible to impement it reliably, yes, but it's also very
easy to introduce a subtle bug, so why touch something that works
perfectly just to save 2 bytes?
Avi Kivity - July 26, 2012, 4:08 p.m.
On 07/26/2012 06:07 PM, Eduardo Habkost wrote:
> On Thu, Jul 26, 2012 at 05:55:09PM +0300, Avi Kivity wrote:
>> On 07/26/2012 05:16 PM, Eduardo Habkost wrote:
>> >> > 
>> >> > It's possible to replace the atomic read of CountCPUs with the bitmap weight
>> >> > calculation on the loop, but: is it really worth it?
>> >> > 
>> >> Why not? This eliminates one more global state.
>> > 
>> > Maybe we can simply make it stop being global and be used only by the
>> > smp.c initialization code?
>> > 
>> > Even if the variable didn't exist yet, I think I would add it myself:
>> > it's simpler and more efficient to calculate the bitmap weight once,
>> > while filling the bitmap, than recalculating it every time on the
>> > while(cmos_smp_count) loop.
>> 
>> So you're spinning more efficiently?
> 
> No, coding more efficiently. I'm lazy. :-)
> 
> I don't want to risk breaking that part of the code to save 2 bytes of
> memory. It's possible to impement it reliably, yes, but it's also very
> easy to introduce a subtle bug, so why touch something that works
> perfectly just to save 2 bytes?

Like gleb, I prefer avoiding derived state which can get out-of-sync.
Eduardo Habkost - July 26, 2012, 5:18 p.m.
On Thu, Jul 26, 2012 at 07:08:19PM +0300, Avi Kivity wrote:
> On 07/26/2012 06:07 PM, Eduardo Habkost wrote:
> > On Thu, Jul 26, 2012 at 05:55:09PM +0300, Avi Kivity wrote:
> >> On 07/26/2012 05:16 PM, Eduardo Habkost wrote:
> >> >> > 
> >> >> > It's possible to replace the atomic read of CountCPUs with the bitmap weight
> >> >> > calculation on the loop, but: is it really worth it?
> >> >> > 
> >> >> Why not? This eliminates one more global state.
> >> > 
> >> > Maybe we can simply make it stop being global and be used only by the
> >> > smp.c initialization code?
> >> > 
> >> > Even if the variable didn't exist yet, I think I would add it myself:
> >> > it's simpler and more efficient to calculate the bitmap weight once,
> >> > while filling the bitmap, than recalculating it every time on the
> >> > while(cmos_smp_count) loop.
> >> 
> >> So you're spinning more efficiently?
> > 
> > No, coding more efficiently. I'm lazy. :-)
> > 
> > I don't want to risk breaking that part of the code to save 2 bytes of
> > memory. It's possible to impement it reliably, yes, but it's also very
> > easy to introduce a subtle bug, so why touch something that works
> > perfectly just to save 2 bytes?
> 
> Like gleb, I prefer avoiding derived state which can get out-of-sync.

It is calculated only once on boot, and only used inside smp_probe() and
nowhere else (after applying this series, I mean). Personally I am more
afraid of subtle races between the bit-setting and the (non-atomic)
bitmap weight calculation + loop, than getting CountCPUs out of sync
while smp_probe() runs.

Note that I am all for making it not a global variable anymore, and use
it only inside smp_probe() and nowhere else (this series does that, the
only thing missing is to remove it from util.h). If anybody wants to
eliminate it from smp_probe() too, be my guest. :)

(I am hoping this is just a suggestion of an additional improvement, not
an issue that would block this patch from inclusion)

Patch

diff --git a/src/acpi-dsdt.dsl b/src/acpi-dsdt.dsl
index 2060686..72dc7d8 100644
--- a/src/acpi-dsdt.dsl
+++ b/src/acpi-dsdt.dsl
@@ -676,6 +676,7 @@  DefinitionBlock (
         /* Methods called by run-time generated SSDT Processor objects */
         Method (CPMA, 1, NotSerialized) {
             // _MAT method - create an madt apic buffer
+            // Arg0 = Processor ID = Local APIC ID
             // Local0 = CPON flag for this cpu
             Store(DerefOf(Index(CPON, Arg0)), Local0)
             // Local1 = Buffer (in madt apic form) to return
@@ -688,6 +689,7 @@  DefinitionBlock (
         }
         Method (CPST, 1, NotSerialized) {
             // _STA method - return ON status of cpu
+            // Arg0 = Processor ID = Local APIC ID
             // Local0 = CPON flag for this cpu
             Store(DerefOf(Index(CPON, Arg0)), Local0)
             If (Local0) { Return(0xF) } Else { Return(0x0) }
@@ -708,7 +710,7 @@  DefinitionBlock (
             Store (PRS, Local5)
             // Local2 = last read byte from bitmap
             Store (Zero, Local2)
-            // Local0 = cpuid iterator
+            // Local0 = Processor ID / APIC ID iterator
             Store (Zero, Local0)
             While (LLess(Local0, SizeOf(CPON))) {
                 // Local1 = CPON flag for this cpu
diff --git a/src/acpi.c b/src/acpi.c
index da3bc57..39b7172 100644
--- a/src/acpi.c
+++ b/src/acpi.c
@@ -327,7 +327,7 @@  build_madt(void)
         apic->length = sizeof(*apic);
         apic->processor_id = i;
         apic->local_apic_id = i;
-        if (i < CountCPUs)
+        if (apic_id_is_present(apic->local_apic_id))
             apic->flags = cpu_to_le32(1);
         else
             apic->flags = cpu_to_le32(0);
@@ -445,6 +445,7 @@  build_ssdt(void)
     }
 
     // build "Method(NTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CP00, Arg1)} ...}"
+    // Arg0 = Processor ID = APIC ID
     *(ssdt_ptr++) = 0x14; // MethodOp
     ssdt_ptr = encodeLen(ssdt_ptr, 2+5+(12*acpi_cpus), 2);
     *(ssdt_ptr++) = 'N';
@@ -477,7 +478,7 @@  build_ssdt(void)
     ssdt_ptr = encodeLen(ssdt_ptr, 2+1+(1*acpi_cpus), 2);
     *(ssdt_ptr++) = acpi_cpus;
     for (i=0; i<acpi_cpus; i++)
-        *(ssdt_ptr++) = (i < CountCPUs) ? 0x01 : 0x00;
+        *(ssdt_ptr++) = (apic_id_is_present(i)) ? 0x01 : 0x00;
 
     // store pci io windows: start, end, length
     // this way we don't have to do the math in the dsdt
@@ -656,10 +657,10 @@  build_srat(void)
         core->proximity_lo = curnode;
         memset(core->proximity_hi, 0, 3);
         core->local_sapic_eid = 0;
-        if (i < CountCPUs)
+        if (apic_id_is_present(i))
             core->flags = cpu_to_le32(1);
         else
-            core->flags = 0;
+            core->flags = cpu_to_le32(0);
         core++;
     }
 
diff --git a/src/mptable.c b/src/mptable.c
index 103f462..9406f98 100644
--- a/src/mptable.c
+++ b/src/mptable.c
@@ -59,7 +59,7 @@  mptable_init(void)
         cpu->apicid = i;
         cpu->apicver = apic_version;
         /* cpu flags: enabled, bootstrap cpu */
-        cpu->cpuflag = ((i<CountCPUs) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
+        cpu->cpuflag = (apic_id_is_present(i) ? 0x01 : 0x00) | ((i==0) ? 0x02 : 0x00);
         cpu->cpusignature = cpuid_signature;
         cpu->featureflag = cpuid_features;
         cpu++;
diff --git a/src/smp.c b/src/smp.c
index 8c077a1..3c36f8c 100644
--- a/src/smp.c
+++ b/src/smp.c
@@ -36,6 +36,8 @@  wrmsr_smp(u32 index, u64 val)
 
 u32 CountCPUs VAR16VISIBLE;
 u32 MaxCountCPUs VAR16VISIBLE;
+// 256 bits for the found APIC IDs
+u32 FoundAPICIDs[256/32] VAR16VISIBLE;
 extern void smp_ap_boot_code(void);
 ASM16(
     "  .global smp_ap_boot_code\n"
@@ -59,6 +61,12 @@  ASM16(
     "  jmp 1b\n"
     "2:\n"
 
+    // get apic ID on EBX, set bit on FoundAPICIDs
+    "  movl $1, %eax\n"
+    "  cpuid\n"
+    "  shrl $24, %ebx\n"
+    "  lock btsl %ebx, FoundAPICIDs\n"
+
     // Increment the cpu counter
     "  lock incl CountCPUs\n"
 
@@ -67,6 +75,11 @@  ASM16(
     "  jmp 1b\n"
     );
 
+int apic_id_is_present(u8 apic_id)
+{
+    return FoundAPICIDs[apic_id/32] & (1 << (apic_id % 32));
+}
+
 // find and initialize the CPUs by launching a SIPI to them
 void
 smp_probe(void)
@@ -82,6 +95,10 @@  smp_probe(void)
         return;
     }
 
+    // mark the BSP initial APIC ID as found, too:
+    u8 apic_id = ebx>>24;
+    FoundAPICIDs[apic_id/32] |= (1 << (apic_id % 32));
+
     // Init the counter.
     writel(&CountCPUs, 1);
 
diff --git a/src/util.h b/src/util.h
index ef8ec7c..89e928c 100644
--- a/src/util.h
+++ b/src/util.h
@@ -355,6 +355,7 @@  extern u32 CountCPUs;
 extern u32 MaxCountCPUs;
 void wrmsr_smp(u32 index, u64 val);
 void smp_probe(void);
+int apic_id_is_present(u8 apic_id);
 
 // coreboot.c
 extern const char *CBvendor, *CBpart;