diff mbox series

[RFC,v2,5/6] hw/arm/virt-acpi-build: Add PPTT table

Message ID 20210413080745.33004-6-wangyanan55@huawei.com
State New
Headers show
Series hw/arm/virt: Introduce cpu topology support | expand

Commit Message

wangyanan (Y) April 13, 2021, 8:07 a.m. UTC
Add the Processor Properties Topology Table (PPTT) to present
CPU topology information to ACPI guests. Note, while a DT boot
Linux guest with a non-flat CPU topology will see socket and
core IDs being sequential integers starting from zero, e.g.
with -smp 4,sockets=2,cores=2,threads=1

a DT boot produces

 cpu:  0 package_id:  0 core_id:  0
 cpu:  1 package_id:  0 core_id:  1
 cpu:  2 package_id:  1 core_id:  0
 cpu:  3 package_id:  1 core_id:  1

an ACPI boot produces

 cpu:  0 package_id: 36 core_id:  0
 cpu:  1 package_id: 36 core_id:  1
 cpu:  2 package_id: 96 core_id:  2
 cpu:  3 package_id: 96 core_id:  3

This is due to several reasons:

 1) DT cpu nodes do not have an equivalent field to what the PPTT
    ACPI Processor ID must be, i.e. something equal to the MADT CPU
    UID or equal to the UID of an ACPI processor container. In both
    ACPI cases those are platform dependant IDs assigned by the
    vendor.

 2) While QEMU is the vendor for a guest, if the topology specifies
    SMT (> 1 thread), then, with ACPI, it is impossible to assign a
    core-id the same value as a package-id, thus it is not possible
    to have package-id=0 and core-id=0. This is because package and
    core containers must be in the same ACPI namespace and therefore
    must have unique UIDs.

 3) ACPI processor containers are not required for PPTT tables to
    be used and, due to the limitations of which IDs are selected
    described above in (2), they are not helpful for QEMU, so we
    don't build them with this patch. In the absence of them, Linux
    assigns its own unique IDs. The maintainers have chosen not to use
    counters from zero, but rather ACPI table offsets, which explains
    why the numbers are so much larger than with DT.

 4) When there is no SMT (threads=1) the core IDs for ACPI boot guests
    match the logical CPU IDs, because these IDs must be equal to the
    MADT CPU UID (as no processor containers are present), and QEMU
    uses the logical CPU ID for these MADT IDs.

Tested-by: Jiajie Li <lijiajie11@huawei.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ying Fang <fangying1@huawei.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
---
 hw/arm/virt-acpi-build.c | 63 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

Comments

Andrew Jones April 27, 2021, 2:16 p.m. UTC | #1
On Tue, Apr 13, 2021 at 04:07:44PM +0800, Yanan Wang wrote:
> Add the Processor Properties Topology Table (PPTT) to present
> CPU topology information to ACPI guests. Note, while a DT boot
> Linux guest with a non-flat CPU topology will see socket and
> core IDs being sequential integers starting from zero, e.g.
> with -smp 4,sockets=2,cores=2,threads=1
> 
> a DT boot produces
> 
>  cpu:  0 package_id:  0 core_id:  0
>  cpu:  1 package_id:  0 core_id:  1
>  cpu:  2 package_id:  1 core_id:  0
>  cpu:  3 package_id:  1 core_id:  1
> 
> an ACPI boot produces
> 
>  cpu:  0 package_id: 36 core_id:  0
>  cpu:  1 package_id: 36 core_id:  1
>  cpu:  2 package_id: 96 core_id:  2
>  cpu:  3 package_id: 96 core_id:  3
> 
> This is due to several reasons:
> 
>  1) DT cpu nodes do not have an equivalent field to what the PPTT
>     ACPI Processor ID must be, i.e. something equal to the MADT CPU
>     UID or equal to the UID of an ACPI processor container. In both
>     ACPI cases those are platform dependant IDs assigned by the
>     vendor.
> 
>  2) While QEMU is the vendor for a guest, if the topology specifies
>     SMT (> 1 thread), then, with ACPI, it is impossible to assign a
>     core-id the same value as a package-id, thus it is not possible
>     to have package-id=0 and core-id=0. This is because package and
>     core containers must be in the same ACPI namespace and therefore
>     must have unique UIDs.
> 
>  3) ACPI processor containers are not required for PPTT tables to
>     be used and, due to the limitations of which IDs are selected
>     described above in (2), they are not helpful for QEMU, so we
>     don't build them with this patch. In the absence of them, Linux
>     assigns its own unique IDs. The maintainers have chosen not to use
>     counters from zero, but rather ACPI table offsets, which explains
>     why the numbers are so much larger than with DT.
> 
>  4) When there is no SMT (threads=1) the core IDs for ACPI boot guests
>     match the logical CPU IDs, because these IDs must be equal to the
>     MADT CPU UID (as no processor containers are present), and QEMU
>     uses the logical CPU ID for these MADT IDs.
> 
> Tested-by: Jiajie Li <lijiajie11@huawei.com>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> Signed-off-by: Ying Fang <fangying1@huawei.com>
> Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
> ---
>  hw/arm/virt-acpi-build.c | 63 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
> 
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 2ad5dad1bf..03fd812d5a 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -436,6 +436,64 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>                   vms->oem_table_id);
>  }
>  
> +/* PPTT */

Please point out the ACPI spec section "5.2.29 Processor Properties
Topology Table"

> +static void
> +build_pptt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)

QEMU doesn't do this style, please write as

static void build_pptt(GArray *table_data, BIOSLinker *linker,
                       VirtMachineState *vms)

> +{
> +    int pptt_start = table_data->len;
> +    int uid = 0, cpus = 0, socket = 0;
> +    MachineState *ms = MACHINE(vms);
> +    unsigned int smp_cores = ms->smp.cores;
> +    unsigned int smp_threads = ms->smp.threads;
> +
> +    acpi_data_push(table_data, sizeof(AcpiTableHeader));
> +
> +    for (socket = 0; cpus < ms->possible_cpus->len; socket++) {

Why not iterate from zero to ms->smp.sockets? With this type of loop if
the number of sockets doesn't correctly fit the number of possible cpus,
then you'll magically create new sockets that the user didn't want. That
case shouldn't be able to happen, though, because the smp parsing should
catch it. In any case, iterating sockets between zero it's number would
make more sense.

> +        uint32_t socket_offset = table_data->len - pptt_start;
> +        int core;
> +
> +        build_processor_hierarchy_node(
> +            table_data, 1, /* Physical package */

If we want to pass the flags with in-argument-list comments, then please
make sure the flags are on separate lines. See below.

> +            0, socket, /* No parent */
> +            NULL, 0);  /* No private resources */

We don't need the 'No parent' and 'No private resources' comments.

        build_processor_hierarchy_node(table_data,
            (1 << 0), /* ACPI 6.2: Physical package */
            0, socket, NULL, 0);

> +
> +        for (core = 0; core < smp_cores; core++) {
> +            uint32_t core_offset = table_data->len - pptt_start;
> +            int thread;
> +
> +            if (smp_threads <= 1) {
> +                build_processor_hierarchy_node(
> +                    table_data,
> +                    (1 << 1) | /* ACPI Processor ID valid */
> +                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> +                    socket_offset, uid++, /* Parent is a Socket */
> +                    NULL, 0);  /* No private resources */

Now I see why you were calling out 6.3 in the previous patch. I suggest
still keeping the function of the previous patch referencing 6.2, but
also keep referencing 6.3 here, like you already do

                build_processor_hierarchy_node(table_data,
                    (1 << 1) | /* ACPI Processor ID valid */
                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
                    socket_offset, uid++, NULL, 0);

> +            } else {
> +                build_processor_hierarchy_node(
> +                    table_data, 0,
> +                    socket_offset, core, /* Parent is a Socket */
> +                    NULL, 0); /* No private resources */

No need for these in-argument-comments that don't match up with the spec.

> +
> +                for (thread = 0; thread < smp_threads; thread++) {
> +                    build_processor_hierarchy_node(
> +                        table_data,
> +                        (1 << 1) | /* ACPI Processor ID valid */
> +                        (1 << 2) | /* ACPI 6.3 - Processor is a Thread */
> +                        (1 << 3),  /* ACPI 6.3 - Node is a Leaf */

This looks good.

> +                        core_offset, uid++, /* Parent is a Core */
> +                        NULL, 0);  /* No private resources */

Don't need these comments.

> +                }
> +            }
> +        }
> +        cpus += smp_cores * smp_threads;

As stated above, we don't want this.

> +    }
> +
> +    build_header(linker, table_data,
> +                 (void *)(table_data->data + pptt_start), "PPTT",
> +                 table_data->len - pptt_start, 2,
> +                 vms->oem_id, vms->oem_table_id);
> +}
> +
>  /* GTDT */
>  static void
>  build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> @@ -707,6 +765,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
>      acpi_add_table(table_offsets, tables_blob);
>      build_madt(tables_blob, tables->linker, vms);
>  
> +    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
> +        acpi_add_table(table_offsets, tables_blob);
> +        build_pptt(tables_blob, tables->linker, vms);
> +    }
> +
>      acpi_add_table(table_offsets, tables_blob);
>      build_gtdt(tables_blob, tables->linker, vms);
>  
> -- 
> 2.19.1
>

Besides some changes that I think should be changed back and the 6.3
flags, this patch looks very similar to [1], so I'd prefer my
authorship be maintained. However, if my authorship is dropped, then
my s-o-b should be replaced with a Co-developed-by.

[1] https://github.com/rhdrjones/qemu/commit/439b38d67ca1f2cbfa5b9892a822b651ebd05c11 

Thanks,
drew
wangyanan (Y) April 28, 2021, 7:30 a.m. UTC | #2
Hi Drew,

On 2021/4/27 22:16, Andrew Jones wrote:
> On Tue, Apr 13, 2021 at 04:07:44PM +0800, Yanan Wang wrote:
>> Add the Processor Properties Topology Table (PPTT) to present
>> CPU topology information to ACPI guests. Note, while a DT boot
>> Linux guest with a non-flat CPU topology will see socket and
>> core IDs being sequential integers starting from zero, e.g.
>> with -smp 4,sockets=2,cores=2,threads=1
>>
>> a DT boot produces
>>
>>   cpu:  0 package_id:  0 core_id:  0
>>   cpu:  1 package_id:  0 core_id:  1
>>   cpu:  2 package_id:  1 core_id:  0
>>   cpu:  3 package_id:  1 core_id:  1
>>
>> an ACPI boot produces
>>
>>   cpu:  0 package_id: 36 core_id:  0
>>   cpu:  1 package_id: 36 core_id:  1
>>   cpu:  2 package_id: 96 core_id:  2
>>   cpu:  3 package_id: 96 core_id:  3
>>
>> This is due to several reasons:
>>
>>   1) DT cpu nodes do not have an equivalent field to what the PPTT
>>      ACPI Processor ID must be, i.e. something equal to the MADT CPU
>>      UID or equal to the UID of an ACPI processor container. In both
>>      ACPI cases those are platform dependant IDs assigned by the
>>      vendor.
>>
>>   2) While QEMU is the vendor for a guest, if the topology specifies
>>      SMT (> 1 thread), then, with ACPI, it is impossible to assign a
>>      core-id the same value as a package-id, thus it is not possible
>>      to have package-id=0 and core-id=0. This is because package and
>>      core containers must be in the same ACPI namespace and therefore
>>      must have unique UIDs.
>>
>>   3) ACPI processor containers are not required for PPTT tables to
>>      be used and, due to the limitations of which IDs are selected
>>      described above in (2), they are not helpful for QEMU, so we
>>      don't build them with this patch. In the absence of them, Linux
>>      assigns its own unique IDs. The maintainers have chosen not to use
>>      counters from zero, but rather ACPI table offsets, which explains
>>      why the numbers are so much larger than with DT.
>>
>>   4) When there is no SMT (threads=1) the core IDs for ACPI boot guests
>>      match the logical CPU IDs, because these IDs must be equal to the
>>      MADT CPU UID (as no processor containers are present), and QEMU
>>      uses the logical CPU ID for these MADT IDs.
>>
>> Tested-by: Jiajie Li <lijiajie11@huawei.com>
>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>> Signed-off-by: Ying Fang <fangying1@huawei.com>
>> Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
>> ---
>>   hw/arm/virt-acpi-build.c | 63 ++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 63 insertions(+)
>>
>> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
>> index 2ad5dad1bf..03fd812d5a 100644
>> --- a/hw/arm/virt-acpi-build.c
>> +++ b/hw/arm/virt-acpi-build.c
>> @@ -436,6 +436,64 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>>                    vms->oem_table_id);
>>   }
>>   
>> +/* PPTT */
> Please point out the ACPI spec section "5.2.29 Processor Properties
> Topology Table"
Will fix.
>> +static void
>> +build_pptt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> QEMU doesn't do this style, please write as
>
> static void build_pptt(GArray *table_data, BIOSLinker *linker,
>                         VirtMachineState *vms)
Will fix.
>> +{
>> +    int pptt_start = table_data->len;
>> +    int uid = 0, cpus = 0, socket = 0;
>> +    MachineState *ms = MACHINE(vms);
>> +    unsigned int smp_cores = ms->smp.cores;
>> +    unsigned int smp_threads = ms->smp.threads;
>> +
>> +    acpi_data_push(table_data, sizeof(AcpiTableHeader));
>> +
>> +    for (socket = 0; cpus < ms->possible_cpus->len; socket++) {
> Why not iterate from zero to ms->smp.sockets? With this type of loop if
> the number of sockets doesn't correctly fit the number of possible cpus,
> then you'll magically create new sockets that the user didn't want. That
> case shouldn't be able to happen, though, because the smp parsing should
> catch it. In any case, iterating sockets between zero it's number would
> make more sense.
In either way, we will never meet "sockets * cores * threads != 
possible_cpus->len" here.
But yes, what you describe makes more sense and will make code easier 
for reading.
>> +        uint32_t socket_offset = table_data->len - pptt_start;
>> +        int core;
>> +
>> +        build_processor_hierarchy_node(
>> +            table_data, 1, /* Physical package */
> If we want to pass the flags with in-argument-list comments, then please
> make sure the flags are on separate lines. See below.
>
>> +            0, socket, /* No parent */
>> +            NULL, 0);  /* No private resources */
> We don't need the 'No parent' and 'No private resources' comments.
>
>          build_processor_hierarchy_node(table_data,
>              (1 << 0), /* ACPI 6.2: Physical package */
>              0, socket, NULL, 0);
>
>> +
>> +        for (core = 0; core < smp_cores; core++) {
>> +            uint32_t core_offset = table_data->len - pptt_start;
>> +            int thread;
>> +
>> +            if (smp_threads <= 1) {
>> +                build_processor_hierarchy_node(
>> +                    table_data,
>> +                    (1 << 1) | /* ACPI Processor ID valid */
>> +                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
>> +                    socket_offset, uid++, /* Parent is a Socket */
>> +                    NULL, 0);  /* No private resources */
> Now I see why you were calling out 6.3 in the previous patch. I suggest
> still keeping the function of the previous patch referencing 6.2, but
> also keep referencing 6.3 here, like you already do
>
>                  build_processor_hierarchy_node(table_data,
>                      (1 << 1) | /* ACPI Processor ID valid */
>                      (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
>                      socket_offset, uid++, NULL, 0);
>
>> +            } else {
>> +                build_processor_hierarchy_node(
>> +                    table_data, 0,
>> +                    socket_offset, core, /* Parent is a Socket */
>> +                    NULL, 0); /* No private resources */
> No need for these in-argument-comments that don't match up with the spec.
>
>> +
>> +                for (thread = 0; thread < smp_threads; thread++) {
>> +                    build_processor_hierarchy_node(
>> +                        table_data,
>> +                        (1 << 1) | /* ACPI Processor ID valid */
>> +                        (1 << 2) | /* ACPI 6.3 - Processor is a Thread */
>> +                        (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> This looks good.
>
>> +                        core_offset, uid++, /* Parent is a Core */
>> +                        NULL, 0);  /* No private resources */
> Don't need these comments.
Thanks for above suggestions and guidance about in-argument-comments.
I will make some adjustment.
>> +                }
>> +            }
>> +        }
>> +        cpus += smp_cores * smp_threads;
> As stated above, we don't want this.
>
>> +    }
>> +
>> +    build_header(linker, table_data,
>> +                 (void *)(table_data->data + pptt_start), "PPTT",
>> +                 table_data->len - pptt_start, 2,
>> +                 vms->oem_id, vms->oem_table_id);
>> +}
>> +
>>   /* GTDT */
>>   static void
>>   build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>> @@ -707,6 +765,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
>>       acpi_add_table(table_offsets, tables_blob);
>>       build_madt(tables_blob, tables->linker, vms);
>>   
>> +    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
>> +        acpi_add_table(table_offsets, tables_blob);
>> +        build_pptt(tables_blob, tables->linker, vms);
>> +    }
>> +
>>       acpi_add_table(table_offsets, tables_blob);
>>       build_gtdt(tables_blob, tables->linker, vms);
>>   
>> -- 
>> 2.19.1
>>
> Besides some changes that I think should be changed back and the 6.3
> flags, this patch looks very similar to [1], so I'd prefer my
> authorship be maintained. However, if my authorship is dropped, then
> my s-o-b should be replaced with a Co-developed-by.
Of course, I will make it right.

Thanks,
Yanan
>
> [1] https://github.com/rhdrjones/qemu/commit/439b38d67ca1f2cbfa5b9892a822b651ebd05c11
>
> Thanks,
> drew
>
> .
wangyanan (Y) May 13, 2021, 5:10 a.m. UTC | #3
Hi Drew,

I got a question below, and hope your reply. Thanks!
On 2021/4/13 16:07, Yanan Wang wrote:
> Add the Processor Properties Topology Table (PPTT) to present
> CPU topology information to ACPI guests. Note, while a DT boot
> Linux guest with a non-flat CPU topology will see socket and
> core IDs being sequential integers starting from zero, e.g.
> with -smp 4,sockets=2,cores=2,threads=1
>
> a DT boot produces
>
>   cpu:  0 package_id:  0 core_id:  0
>   cpu:  1 package_id:  0 core_id:  1
>   cpu:  2 package_id:  1 core_id:  0
>   cpu:  3 package_id:  1 core_id:  1
>
> an ACPI boot produces
>
>   cpu:  0 package_id: 36 core_id:  0
>   cpu:  1 package_id: 36 core_id:  1
>   cpu:  2 package_id: 96 core_id:  2
>   cpu:  3 package_id: 96 core_id:  3
>
> This is due to several reasons:
>
>   1) DT cpu nodes do not have an equivalent field to what the PPTT
>      ACPI Processor ID must be, i.e. something equal to the MADT CPU
>      UID or equal to the UID of an ACPI processor container. In both
>      ACPI cases those are platform dependant IDs assigned by the
>      vendor.
>
>   2) While QEMU is the vendor for a guest, if the topology specifies
>      SMT (> 1 thread), then, with ACPI, it is impossible to assign a
>      core-id the same value as a package-id, thus it is not possible
>      to have package-id=0 and core-id=0. This is because package and
>      core containers must be in the same ACPI namespace and therefore
>      must have unique UIDs.
>
>   3) ACPI processor containers are not required for PPTT tables to
>      be used and, due to the limitations of which IDs are selected
>      described above in (2), they are not helpful for QEMU, so we
>      don't build them with this patch. In the absence of them, Linux
>      assigns its own unique IDs. The maintainers have chosen not to use
>      counters from zero, but rather ACPI table offsets, which explains
>      why the numbers are so much larger than with DT.
>
>   4) When there is no SMT (threads=1) the core IDs for ACPI boot guests
>      match the logical CPU IDs, because these IDs must be equal to the
>      MADT CPU UID (as no processor containers are present), and QEMU
>      uses the logical CPU ID for these MADT IDs.
>
> Tested-by: Jiajie Li <lijiajie11@huawei.com>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> Signed-off-by: Ying Fang <fangying1@huawei.com>
> Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
> ---
>   hw/arm/virt-acpi-build.c | 63 ++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 63 insertions(+)
>
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 2ad5dad1bf..03fd812d5a 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -436,6 +436,64 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>                    vms->oem_table_id);
>   }
>   
> +/* PPTT */
> +static void
> +build_pptt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> +{
> +    int pptt_start = table_data->len;
> +    int uid = 0, cpus = 0, socket = 0;
> +    MachineState *ms = MACHINE(vms);
> +    unsigned int smp_cores = ms->smp.cores;
> +    unsigned int smp_threads = ms->smp.threads;
> +
> +    acpi_data_push(table_data, sizeof(AcpiTableHeader));
> +
> +    for (socket = 0; cpus < ms->possible_cpus->len; socket++) {
> +        uint32_t socket_offset = table_data->len - pptt_start;
> +        int core;
> +
> +        build_processor_hierarchy_node(
> +            table_data, 1, /* Physical package */
> +            0, socket, /* No parent */
> +            NULL, 0);  /* No private resources */
> +
> +        for (core = 0; core < smp_cores; core++) {
> +            uint32_t core_offset = table_data->len - pptt_start;
> +            int thread;
> +
> +            if (smp_threads <= 1) {
> +                build_processor_hierarchy_node(
> +                    table_data,
> +                    (1 << 1) | /* ACPI Processor ID valid */
> +                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> +                    socket_offset, uid++, /* Parent is a Socket */
> +                    NULL, 0);  /* No private resources */
> +            } else {
> +                build_processor_hierarchy_node(
> +                    table_data, 0,
> +                    socket_offset, core, /* Parent is a Socket */
> +                    NULL, 0); /* No private resources */
> +
> +                for (thread = 0; thread < smp_threads; thread++) {
> +                    build_processor_hierarchy_node(
> +                        table_data,
> +                        (1 << 1) | /* ACPI Processor ID valid */
> +                        (1 << 2) | /* ACPI 6.3 - Processor is a Thread */
> +                        (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> +                        core_offset, uid++, /* Parent is a Core */
> +                        NULL, 0);  /* No private resources */
> +                }
> +            }
> +        }
> +        cpus += smp_cores * smp_threads;
> +    }
> +
> +    build_header(linker, table_data,
> +                 (void *)(table_data->data + pptt_start), "PPTT",
> +                 table_data->len - pptt_start, 2,
> +                 vms->oem_id, vms->oem_table_id);
> +}
> +
>   /* GTDT */
>   static void
>   build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> @@ -707,6 +765,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
>       acpi_add_table(table_offsets, tables_blob);
>       build_madt(tables_blob, tables->linker, vms);
>   
> +    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
I'm not really sure why we need to care about "ms->smp.cpus > 1" here?

IMO, just like MADT in which we create both ENABLED and DISABLED
gicc nodes no matter of number of ENABLED nodes is one or not, we
should create PPTT table for all the possible cpus and not care about
number of smp cpus, too. This will be more consistent with the ACPI
specification and the PPTT table will be used for ACPI cpu hotplug in
the future even with  "smp.cpus == 1".

Care of "smp.cpus > 1" in the DT cpu-map part makes sense to me,
because we are required to only add present cpu nodes to the DT and
Linux Doc says that a cpu-map is not needed for uniprocessor systems.

Thanks,
Yanan
> +        acpi_add_table(table_offsets, tables_blob);
> +        build_pptt(tables_blob, tables->linker, vms);
> +    }
> +
>       acpi_add_table(table_offsets, tables_blob);
>       build_gtdt(tables_blob, tables->linker, vms);
>
Andrew Jones May 13, 2021, 6:55 a.m. UTC | #4
On Thu, May 13, 2021 at 01:10:10PM +0800, wangyanan (Y) wrote:
> >   /* GTDT */
> >   static void
> >   build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> > @@ -707,6 +765,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
> >       acpi_add_table(table_offsets, tables_blob);
> >       build_madt(tables_blob, tables->linker, vms);
> > +    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
> I'm not really sure why we need to care about "ms->smp.cpus > 1" here?
> 
> IMO, just like MADT in which we create both ENABLED and DISABLED
> gicc nodes no matter of number of ENABLED nodes is one or not, we
> should create PPTT table for all the possible cpus and not care about
> number of smp cpus, too. This will be more consistent with the ACPI
> specification and the PPTT table will be used for ACPI cpu hotplug in
> the future even with  "smp.cpus == 1".
> 
> Care of "smp.cpus > 1" in the DT cpu-map part makes sense to me,
> because we are required to only add present cpu nodes to the DT and
> Linux Doc says that a cpu-map is not needed for uniprocessor systems.
> 

Hi Yanan,

You're right. Let's just always generate the PPTT.

Thanks,
drew
Salil Mehta May 18, 2021, 7:17 a.m. UTC | #5
> From: Qemu-arm [mailto:qemu-arm-bounces+salil.mehta=huawei.com@nongnu.org]
> On Behalf Of wangyanan (Y)
> Sent: Thursday, May 13, 2021 6:10 AM
> 
> Hi Drew,
> 
> I got a question below, and hope your reply. Thanks!
> On 2021/4/13 16:07, Yanan Wang wrote:
> > Add the Processor Properties Topology Table (PPTT) to present
> > CPU topology information to ACPI guests. Note, while a DT boot
> > Linux guest with a non-flat CPU topology will see socket and
> > core IDs being sequential integers starting from zero, e.g.
> > with -smp 4,sockets=2,cores=2,threads=1
> >
> > a DT boot produces
> >
> >   cpu:  0 package_id:  0 core_id:  0
> >   cpu:  1 package_id:  0 core_id:  1
> >   cpu:  2 package_id:  1 core_id:  0
> >   cpu:  3 package_id:  1 core_id:  1
> >
> > an ACPI boot produces
> >
> >   cpu:  0 package_id: 36 core_id:  0
> >   cpu:  1 package_id: 36 core_id:  1
> >   cpu:  2 package_id: 96 core_id:  2
> >   cpu:  3 package_id: 96 core_id:  3
> >
> > This is due to several reasons:
> >
> >   1) DT cpu nodes do not have an equivalent field to what the PPTT
> >      ACPI Processor ID must be, i.e. something equal to the MADT CPU
> >      UID or equal to the UID of an ACPI processor container. In both
> >      ACPI cases those are platform dependant IDs assigned by the
> >      vendor.
> >
> >   2) While QEMU is the vendor for a guest, if the topology specifies
> >      SMT (> 1 thread), then, with ACPI, it is impossible to assign a
> >      core-id the same value as a package-id, thus it is not possible
> >      to have package-id=0 and core-id=0. This is because package and
> >      core containers must be in the same ACPI namespace and therefore
> >      must have unique UIDs.
> >
> >   3) ACPI processor containers are not required for PPTT tables to
> >      be used and, due to the limitations of which IDs are selected
> >      described above in (2), they are not helpful for QEMU, so we
> >      don't build them with this patch. In the absence of them, Linux
> >      assigns its own unique IDs. The maintainers have chosen not to use
> >      counters from zero, but rather ACPI table offsets, which explains
> >      why the numbers are so much larger than with DT.
> >
> >   4) When there is no SMT (threads=1) the core IDs for ACPI boot guests
> >      match the logical CPU IDs, because these IDs must be equal to the
> >      MADT CPU UID (as no processor containers are present), and QEMU
> >      uses the logical CPU ID for these MADT IDs.
> >
> > Tested-by: Jiajie Li <lijiajie11@huawei.com>
> > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > Signed-off-by: Ying Fang <fangying1@huawei.com>
> > Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
> > ---
> >   hw/arm/virt-acpi-build.c | 63 ++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 63 insertions(+)
> >
> > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> > index 2ad5dad1bf..03fd812d5a 100644
> > --- a/hw/arm/virt-acpi-build.c
> > +++ b/hw/arm/virt-acpi-build.c
> > @@ -436,6 +436,64 @@ build_srat(GArray *table_data, BIOSLinker *linker,
> VirtMachineState *vms)
> >                    vms->oem_table_id);
> >   }
> >
> > +/* PPTT */
> > +static void
> > +build_pptt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> > +{
> > +    int pptt_start = table_data->len;
> > +    int uid = 0, cpus = 0, socket = 0;
> > +    MachineState *ms = MACHINE(vms);
> > +    unsigned int smp_cores = ms->smp.cores;
> > +    unsigned int smp_threads = ms->smp.threads;
> > +
> > +    acpi_data_push(table_data, sizeof(AcpiTableHeader));
> > +
> > +    for (socket = 0; cpus < ms->possible_cpus->len; socket++) {
> > +        uint32_t socket_offset = table_data->len - pptt_start;
> > +        int core;
> > +
> > +        build_processor_hierarchy_node(
> > +            table_data, 1, /* Physical package */
> > +            0, socket, /* No parent */
> > +            NULL, 0);  /* No private resources */
> > +
> > +        for (core = 0; core < smp_cores; core++) {
> > +            uint32_t core_offset = table_data->len - pptt_start;
> > +            int thread;
> > +
> > +            if (smp_threads <= 1) {
> > +                build_processor_hierarchy_node(
> > +                    table_data,
> > +                    (1 << 1) | /* ACPI Processor ID valid */
> > +                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> > +                    socket_offset, uid++, /* Parent is a Socket */
> > +                    NULL, 0);  /* No private resources */
> > +            } else {
> > +                build_processor_hierarchy_node(
> > +                    table_data, 0,
> > +                    socket_offset, core, /* Parent is a Socket */
> > +                    NULL, 0); /* No private resources */
> > +
> > +                for (thread = 0; thread < smp_threads; thread++) {
> > +                    build_processor_hierarchy_node(
> > +                        table_data,
> > +                        (1 << 1) | /* ACPI Processor ID valid */
> > +                        (1 << 2) | /* ACPI 6.3 - Processor is a Thread */
> > +                        (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> > +                        core_offset, uid++, /* Parent is a Core */
> > +                        NULL, 0);  /* No private resources */
> > +                }
> > +            }
> > +        }
> > +        cpus += smp_cores * smp_threads;
> > +    }
> > +
> > +    build_header(linker, table_data,
> > +                 (void *)(table_data->data + pptt_start), "PPTT",
> > +                 table_data->len - pptt_start, 2,
> > +                 vms->oem_id, vms->oem_table_id);
> > +}
> > +
> >   /* GTDT */
> >   static void
> >   build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> > @@ -707,6 +765,11 @@ void virt_acpi_build(VirtMachineState *vms,
> AcpiBuildTables *tables)
> >       acpi_add_table(table_offsets, tables_blob);
> >       build_madt(tables_blob, tables->linker, vms);
> >
> > +    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
> I'm not really sure why we need to care about "ms->smp.cpus > 1" here?
> 
> IMO, just like MADT in which we create both ENABLED and DISABLED
> gicc nodes no matter of number of ENABLED nodes is one or not, we
> should create PPTT table for all the possible cpus and not care about
> number of smp cpus, too. This will be more consistent with the ACPI
> specification and the PPTT table will be used for ACPI cpu hotplug in
> the future even with  "smp.cpus == 1".


A humble request:
Let us not anticipate the changes of vcpu Hotplug here. Things are fluid
with respect to the vcpu Hotplug right now and I think it will not be
right to base PPTT Table changes in anticipation of something we are not
sure of what it looks like.

Any such decisions should be postponed and be made part of the actual
vcpu Hotplug changes when(and if ever) they come for ARM64. This will
also ensure proper review of such changes and useful in that particular
context.


Thanks

> 
> Care of "smp.cpus > 1" in the DT cpu-map part makes sense to me,
> because we are required to only add present cpu nodes to the DT and
> Linux Doc says that a cpu-map is not needed for uniprocessor systems.
> 
> Thanks,
> Yanan
> > +        acpi_add_table(table_offsets, tables_blob);
> > +        build_pptt(tables_blob, tables->linker, vms);
> > +    }
> > +
> >       acpi_add_table(table_offsets, tables_blob);
> >       build_gtdt(tables_blob, tables->linker, vms);
> >
Andrew Jones May 18, 2021, 7:42 a.m. UTC | #6
On Tue, May 18, 2021 at 07:17:56AM +0000, Salil Mehta wrote:
> > From: Qemu-arm [mailto:qemu-arm-bounces+salil.mehta=huawei.com@nongnu.org]
> > On Behalf Of wangyanan (Y)
> > Sent: Thursday, May 13, 2021 6:10 AM
> > 
> > Hi Drew,
> > 
> > I got a question below, and hope your reply. Thanks!
> > On 2021/4/13 16:07, Yanan Wang wrote:
> > > Add the Processor Properties Topology Table (PPTT) to present
> > > CPU topology information to ACPI guests. Note, while a DT boot
> > > Linux guest with a non-flat CPU topology will see socket and
> > > core IDs being sequential integers starting from zero, e.g.
> > > with -smp 4,sockets=2,cores=2,threads=1
> > >
> > > a DT boot produces
> > >
> > >   cpu:  0 package_id:  0 core_id:  0
> > >   cpu:  1 package_id:  0 core_id:  1
> > >   cpu:  2 package_id:  1 core_id:  0
> > >   cpu:  3 package_id:  1 core_id:  1
> > >
> > > an ACPI boot produces
> > >
> > >   cpu:  0 package_id: 36 core_id:  0
> > >   cpu:  1 package_id: 36 core_id:  1
> > >   cpu:  2 package_id: 96 core_id:  2
> > >   cpu:  3 package_id: 96 core_id:  3
> > >
> > > This is due to several reasons:
> > >
> > >   1) DT cpu nodes do not have an equivalent field to what the PPTT
> > >      ACPI Processor ID must be, i.e. something equal to the MADT CPU
> > >      UID or equal to the UID of an ACPI processor container. In both
> > >      ACPI cases those are platform dependant IDs assigned by the
> > >      vendor.
> > >
> > >   2) While QEMU is the vendor for a guest, if the topology specifies
> > >      SMT (> 1 thread), then, with ACPI, it is impossible to assign a
> > >      core-id the same value as a package-id, thus it is not possible
> > >      to have package-id=0 and core-id=0. This is because package and
> > >      core containers must be in the same ACPI namespace and therefore
> > >      must have unique UIDs.
> > >
> > >   3) ACPI processor containers are not required for PPTT tables to
> > >      be used and, due to the limitations of which IDs are selected
> > >      described above in (2), they are not helpful for QEMU, so we
> > >      don't build them with this patch. In the absence of them, Linux
> > >      assigns its own unique IDs. The maintainers have chosen not to use
> > >      counters from zero, but rather ACPI table offsets, which explains
> > >      why the numbers are so much larger than with DT.
> > >
> > >   4) When there is no SMT (threads=1) the core IDs for ACPI boot guests
> > >      match the logical CPU IDs, because these IDs must be equal to the
> > >      MADT CPU UID (as no processor containers are present), and QEMU
> > >      uses the logical CPU ID for these MADT IDs.
> > >
> > > Tested-by: Jiajie Li <lijiajie11@huawei.com>
> > > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > > Signed-off-by: Ying Fang <fangying1@huawei.com>
> > > Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
> > > ---
> > >   hw/arm/virt-acpi-build.c | 63 ++++++++++++++++++++++++++++++++++++++++
> > >   1 file changed, 63 insertions(+)
> > >
> > > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> > > index 2ad5dad1bf..03fd812d5a 100644
> > > --- a/hw/arm/virt-acpi-build.c
> > > +++ b/hw/arm/virt-acpi-build.c
> > > @@ -436,6 +436,64 @@ build_srat(GArray *table_data, BIOSLinker *linker,
> > VirtMachineState *vms)
> > >                    vms->oem_table_id);
> > >   }
> > >
> > > +/* PPTT */
> > > +static void
> > > +build_pptt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> > > +{
> > > +    int pptt_start = table_data->len;
> > > +    int uid = 0, cpus = 0, socket = 0;
> > > +    MachineState *ms = MACHINE(vms);
> > > +    unsigned int smp_cores = ms->smp.cores;
> > > +    unsigned int smp_threads = ms->smp.threads;
> > > +
> > > +    acpi_data_push(table_data, sizeof(AcpiTableHeader));
> > > +
> > > +    for (socket = 0; cpus < ms->possible_cpus->len; socket++) {
> > > +        uint32_t socket_offset = table_data->len - pptt_start;
> > > +        int core;
> > > +
> > > +        build_processor_hierarchy_node(
> > > +            table_data, 1, /* Physical package */
> > > +            0, socket, /* No parent */
> > > +            NULL, 0);  /* No private resources */
> > > +
> > > +        for (core = 0; core < smp_cores; core++) {
> > > +            uint32_t core_offset = table_data->len - pptt_start;
> > > +            int thread;
> > > +
> > > +            if (smp_threads <= 1) {
> > > +                build_processor_hierarchy_node(
> > > +                    table_data,
> > > +                    (1 << 1) | /* ACPI Processor ID valid */
> > > +                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> > > +                    socket_offset, uid++, /* Parent is a Socket */
> > > +                    NULL, 0);  /* No private resources */
> > > +            } else {
> > > +                build_processor_hierarchy_node(
> > > +                    table_data, 0,
> > > +                    socket_offset, core, /* Parent is a Socket */
> > > +                    NULL, 0); /* No private resources */
> > > +
> > > +                for (thread = 0; thread < smp_threads; thread++) {
> > > +                    build_processor_hierarchy_node(
> > > +                        table_data,
> > > +                        (1 << 1) | /* ACPI Processor ID valid */
> > > +                        (1 << 2) | /* ACPI 6.3 - Processor is a Thread */
> > > +                        (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> > > +                        core_offset, uid++, /* Parent is a Core */
> > > +                        NULL, 0);  /* No private resources */
> > > +                }
> > > +            }
> > > +        }
> > > +        cpus += smp_cores * smp_threads;
> > > +    }
> > > +
> > > +    build_header(linker, table_data,
> > > +                 (void *)(table_data->data + pptt_start), "PPTT",
> > > +                 table_data->len - pptt_start, 2,
> > > +                 vms->oem_id, vms->oem_table_id);
> > > +}
> > > +
> > >   /* GTDT */
> > >   static void
> > >   build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> > > @@ -707,6 +765,11 @@ void virt_acpi_build(VirtMachineState *vms,
> > AcpiBuildTables *tables)
> > >       acpi_add_table(table_offsets, tables_blob);
> > >       build_madt(tables_blob, tables->linker, vms);
> > >
> > > +    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
> > I'm not really sure why we need to care about "ms->smp.cpus > 1" here?
> > 
> > IMO, just like MADT in which we create both ENABLED and DISABLED
> > gicc nodes no matter of number of ENABLED nodes is one or not, we
> > should create PPTT table for all the possible cpus and not care about
> > number of smp cpus, too. This will be more consistent with the ACPI
> > specification and the PPTT table will be used for ACPI cpu hotplug in
> > the future even with  "smp.cpus == 1".
> 
> 
> A humble request:
> Let us not anticipate the changes of vcpu Hotplug here. Things are fluid
> with respect to the vcpu Hotplug right now and I think it will not be
> right to base PPTT Table changes in anticipation of something we are not
> sure of what it looks like.
> 
> Any such decisions should be postponed and be made part of the actual
> vcpu Hotplug changes when(and if ever) they come for ARM64. This will
> also ensure proper review of such changes and useful in that particular
> context.

Hi Salil,

Can you please elaborate on this and send some pointers to the hot plug
discussions? You're not saying that we shouldn't try to generate PPTT
tables for AArch64 guests until a solution for hot plug has been
determined, are you? If so, I don't think I would agree with that. There
are benefits to properly describing cpu topology to guests, even without
hot plug. Those benefits, when vcpu pinning is used, are the same benefits
as for the host, which already use PPTT tables to describe topology, even
though hot plug isn't supported.

Now, if you're saying we should only generate tables for smp.cpus, not
smp.maxcpus, because hot plug isn't supported anyway, then I see your
point. But, it'd be better to require smp.cpus == smp.maxcpus in our
smp_parse function to do that, which we've never done before, so we may
have trouble supporting existing command lines.

Thanks,
drew

> 
> 
> Thanks
> 
> > 
> > Care of "smp.cpus > 1" in the DT cpu-map part makes sense to me,
> > because we are required to only add present cpu nodes to the DT and
> > Linux Doc says that a cpu-map is not needed for uniprocessor systems.
> > 
> > Thanks,
> > Yanan
> > > +        acpi_add_table(table_offsets, tables_blob);
> > > +        build_pptt(tables_blob, tables->linker, vms);
> > > +    }
> > > +
> > >       acpi_add_table(table_offsets, tables_blob);
> > >       build_gtdt(tables_blob, tables->linker, vms);
> > >
>
wangyanan (Y) May 18, 2021, 9:16 a.m. UTC | #7
On 2021/5/18 15:17, Salil Mehta wrote:
>> From: Qemu-arm [mailto:qemu-arm-bounces+salil.mehta=huawei.com@nongnu.org]
>> On Behalf Of wangyanan (Y)
>> Sent: Thursday, May 13, 2021 6:10 AM
>>
>> Hi Drew,
>>
>> I got a question below, and hope your reply. Thanks!
>> On 2021/4/13 16:07, Yanan Wang wrote:
>>> Add the Processor Properties Topology Table (PPTT) to present
>>> CPU topology information to ACPI guests. Note, while a DT boot
>>> Linux guest with a non-flat CPU topology will see socket and
>>> core IDs being sequential integers starting from zero, e.g.
>>> with -smp 4,sockets=2,cores=2,threads=1
>>>
>>> a DT boot produces
>>>
>>>    cpu:  0 package_id:  0 core_id:  0
>>>    cpu:  1 package_id:  0 core_id:  1
>>>    cpu:  2 package_id:  1 core_id:  0
>>>    cpu:  3 package_id:  1 core_id:  1
>>>
>>> an ACPI boot produces
>>>
>>>    cpu:  0 package_id: 36 core_id:  0
>>>    cpu:  1 package_id: 36 core_id:  1
>>>    cpu:  2 package_id: 96 core_id:  2
>>>    cpu:  3 package_id: 96 core_id:  3
>>>
>>> This is due to several reasons:
>>>
>>>    1) DT cpu nodes do not have an equivalent field to what the PPTT
>>>       ACPI Processor ID must be, i.e. something equal to the MADT CPU
>>>       UID or equal to the UID of an ACPI processor container. In both
>>>       ACPI cases those are platform dependant IDs assigned by the
>>>       vendor.
>>>
>>>    2) While QEMU is the vendor for a guest, if the topology specifies
>>>       SMT (> 1 thread), then, with ACPI, it is impossible to assign a
>>>       core-id the same value as a package-id, thus it is not possible
>>>       to have package-id=0 and core-id=0. This is because package and
>>>       core containers must be in the same ACPI namespace and therefore
>>>       must have unique UIDs.
>>>
>>>    3) ACPI processor containers are not required for PPTT tables to
>>>       be used and, due to the limitations of which IDs are selected
>>>       described above in (2), they are not helpful for QEMU, so we
>>>       don't build them with this patch. In the absence of them, Linux
>>>       assigns its own unique IDs. The maintainers have chosen not to use
>>>       counters from zero, but rather ACPI table offsets, which explains
>>>       why the numbers are so much larger than with DT.
>>>
>>>    4) When there is no SMT (threads=1) the core IDs for ACPI boot guests
>>>       match the logical CPU IDs, because these IDs must be equal to the
>>>       MADT CPU UID (as no processor containers are present), and QEMU
>>>       uses the logical CPU ID for these MADT IDs.
>>>
>>> Tested-by: Jiajie Li <lijiajie11@huawei.com>
>>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>>> Signed-off-by: Ying Fang <fangying1@huawei.com>
>>> Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
>>> ---
>>>    hw/arm/virt-acpi-build.c | 63 ++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 63 insertions(+)
>>>
>>> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
>>> index 2ad5dad1bf..03fd812d5a 100644
>>> --- a/hw/arm/virt-acpi-build.c
>>> +++ b/hw/arm/virt-acpi-build.c
>>> @@ -436,6 +436,64 @@ build_srat(GArray *table_data, BIOSLinker *linker,
>> VirtMachineState *vms)
>>>                     vms->oem_table_id);
>>>    }
>>>
>>> +/* PPTT */
>>> +static void
>>> +build_pptt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>>> +{
>>> +    int pptt_start = table_data->len;
>>> +    int uid = 0, cpus = 0, socket = 0;
>>> +    MachineState *ms = MACHINE(vms);
>>> +    unsigned int smp_cores = ms->smp.cores;
>>> +    unsigned int smp_threads = ms->smp.threads;
>>> +
>>> +    acpi_data_push(table_data, sizeof(AcpiTableHeader));
>>> +
>>> +    for (socket = 0; cpus < ms->possible_cpus->len; socket++) {
>>> +        uint32_t socket_offset = table_data->len - pptt_start;
>>> +        int core;
>>> +
>>> +        build_processor_hierarchy_node(
>>> +            table_data, 1, /* Physical package */
>>> +            0, socket, /* No parent */
>>> +            NULL, 0);  /* No private resources */
>>> +
>>> +        for (core = 0; core < smp_cores; core++) {
>>> +            uint32_t core_offset = table_data->len - pptt_start;
>>> +            int thread;
>>> +
>>> +            if (smp_threads <= 1) {
>>> +                build_processor_hierarchy_node(
>>> +                    table_data,
>>> +                    (1 << 1) | /* ACPI Processor ID valid */
>>> +                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
>>> +                    socket_offset, uid++, /* Parent is a Socket */
>>> +                    NULL, 0);  /* No private resources */
>>> +            } else {
>>> +                build_processor_hierarchy_node(
>>> +                    table_data, 0,
>>> +                    socket_offset, core, /* Parent is a Socket */
>>> +                    NULL, 0); /* No private resources */
>>> +
>>> +                for (thread = 0; thread < smp_threads; thread++) {
>>> +                    build_processor_hierarchy_node(
>>> +                        table_data,
>>> +                        (1 << 1) | /* ACPI Processor ID valid */
>>> +                        (1 << 2) | /* ACPI 6.3 - Processor is a Thread */
>>> +                        (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
>>> +                        core_offset, uid++, /* Parent is a Core */
>>> +                        NULL, 0);  /* No private resources */
>>> +                }
>>> +            }
>>> +        }
>>> +        cpus += smp_cores * smp_threads;
>>> +    }
>>> +
>>> +    build_header(linker, table_data,
>>> +                 (void *)(table_data->data + pptt_start), "PPTT",
>>> +                 table_data->len - pptt_start, 2,
>>> +                 vms->oem_id, vms->oem_table_id);
>>> +}
>>> +
>>>    /* GTDT */
>>>    static void
>>>    build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>>> @@ -707,6 +765,11 @@ void virt_acpi_build(VirtMachineState *vms,
>> AcpiBuildTables *tables)
>>>        acpi_add_table(table_offsets, tables_blob);
>>>        build_madt(tables_blob, tables->linker, vms);
>>>
>>> +    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
>> I'm not really sure why we need to care about "ms->smp.cpus > 1" here?
>>
>> IMO, just like MADT in which we create both ENABLED and DISABLED
>> gicc nodes no matter of number of ENABLED nodes is one or not, we
>> should create PPTT table for all the possible cpus and not care about
>> number of smp cpus, too. This will be more consistent with the ACPI
>> specification and the PPTT table will be used for ACPI cpu hotplug in
>> the future even with  "smp.cpus == 1".
>
> A humble request:
> Let us not anticipate the changes of vcpu Hotplug here. Things are fluid
> with respect to the vcpu Hotplug right now and I think it will not be
> right to base PPTT Table changes in anticipation of something we are not
> sure of what it looks like.
Hi Salil,

I agree with that I shouldn't anticipate vcpu hotplug which has little
connect with this series. So it's not appropriately to consider too much
of it when generating PPTT. I'm guessing this is what you mean.

Then PPTT generation is needed for cpu topology exposure to guest
and the ACPI spec context also indicates that we should provided the
hierarchy information of all cpus. See [1] (Note info at page 260).
Can we agree on this ?

[1] https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf

Thanks,
Yanan
> Any such decisions should be postponed and be made part of the actual
> vcpu Hotplug changes when(and if ever) they come for ARM64. This will
> also ensure proper review of such changes and useful in that particular
> context.
>
>
> Thanks
>
>> Care of "smp.cpus > 1" in the DT cpu-map part makes sense to me,
>> because we are required to only add present cpu nodes to the DT and
>> Linux Doc says that a cpu-map is not needed for uniprocessor systems.
>>
>> Thanks,
>> Yanan
>>> +        acpi_add_table(table_offsets, tables_blob);
>>> +        build_pptt(tables_blob, tables->linker, vms);
>>> +    }
>>> +
>>>        acpi_add_table(table_offsets, tables_blob);
>>>        build_gtdt(tables_blob, tables->linker, vms);
>>>
Salil Mehta May 18, 2021, 6:34 p.m. UTC | #8
> From: Andrew Jones [mailto:drjones@redhat.com]
> Sent: Tuesday, May 18, 2021 8:42 AM
> To: Salil Mehta <salil.mehta@huawei.com>
> Cc: wangyanan (Y) <wangyanan55@huawei.com>; Peter Maydell
> <peter.maydell@linaro.org>; Michael S . Tsirkin <mst@redhat.com>; Wanghaibin
> (D) <wanghaibin.wang@huawei.com>; qemu-devel@nongnu.org; Shannon Zhao
> <shannon.zhaosl@gmail.com>; qemu-arm@nongnu.org; Alistair Francis
> <alistair.francis@wdc.com>; Zengtao (B) <prime.zeng@hisilicon.com>;
> yangyicong <yangyicong@huawei.com>; yuzenghui <yuzenghui@huawei.com>; Igor
> Mammedov <imammedo@redhat.com>; zhukeqian <zhukeqian1@huawei.com>; lijiajie (H)
> <lijiajie11@huawei.com>; David Gibson <david@gibson.dropbear.id.au>; Linuxarm
> <linuxarm@huawei.com>; linuxarm@openeuler.org
> Subject: Re: [RFC PATCH v2 5/6] hw/arm/virt-acpi-build: Add PPTT table
> 
> On Tue, May 18, 2021 at 07:17:56AM +0000, Salil Mehta wrote:
> > > From: Qemu-arm
> [mailto:qemu-arm-bounces+salil.mehta=huawei.com@nongnu.org]
> > > On Behalf Of wangyanan (Y)
> > > Sent: Thursday, May 13, 2021 6:10 AM
> > >
> > > Hi Drew,
> > >
> > > I got a question below, and hope your reply. Thanks!
> > > On 2021/4/13 16:07, Yanan Wang wrote:
> > > > Add the Processor Properties Topology Table (PPTT) to present
> > > > CPU topology information to ACPI guests. Note, while a DT boot
> > > > Linux guest with a non-flat CPU topology will see socket and
> > > > core IDs being sequential integers starting from zero, e.g.
> > > > with -smp 4,sockets=2,cores=2,threads=1
> > > >
> > > > a DT boot produces
> > > >
> > > >   cpu:  0 package_id:  0 core_id:  0
> > > >   cpu:  1 package_id:  0 core_id:  1
> > > >   cpu:  2 package_id:  1 core_id:  0
> > > >   cpu:  3 package_id:  1 core_id:  1
> > > >
> > > > an ACPI boot produces
> > > >
> > > >   cpu:  0 package_id: 36 core_id:  0
> > > >   cpu:  1 package_id: 36 core_id:  1
> > > >   cpu:  2 package_id: 96 core_id:  2
> > > >   cpu:  3 package_id: 96 core_id:  3
> > > >
> > > > This is due to several reasons:
> > > >
> > > >   1) DT cpu nodes do not have an equivalent field to what the PPTT
> > > >      ACPI Processor ID must be, i.e. something equal to the MADT CPU
> > > >      UID or equal to the UID of an ACPI processor container. In both
> > > >      ACPI cases those are platform dependant IDs assigned by the
> > > >      vendor.
> > > >
> > > >   2) While QEMU is the vendor for a guest, if the topology specifies
> > > >      SMT (> 1 thread), then, with ACPI, it is impossible to assign a
> > > >      core-id the same value as a package-id, thus it is not possible
> > > >      to have package-id=0 and core-id=0. This is because package and
> > > >      core containers must be in the same ACPI namespace and therefore
> > > >      must have unique UIDs.
> > > >
> > > >   3) ACPI processor containers are not required for PPTT tables to
> > > >      be used and, due to the limitations of which IDs are selected
> > > >      described above in (2), they are not helpful for QEMU, so we
> > > >      don't build them with this patch. In the absence of them, Linux
> > > >      assigns its own unique IDs. The maintainers have chosen not to use
> > > >      counters from zero, but rather ACPI table offsets, which explains
> > > >      why the numbers are so much larger than with DT.
> > > >
> > > >   4) When there is no SMT (threads=1) the core IDs for ACPI boot guests
> > > >      match the logical CPU IDs, because these IDs must be equal to the
> > > >      MADT CPU UID (as no processor containers are present), and QEMU
> > > >      uses the logical CPU ID for these MADT IDs.
> > > >
> > > > Tested-by: Jiajie Li <lijiajie11@huawei.com>
> > > > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > > > Signed-off-by: Ying Fang <fangying1@huawei.com>
> > > > Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
> > > > ---
> > > >   hw/arm/virt-acpi-build.c | 63 ++++++++++++++++++++++++++++++++++++++++
> > > >   1 file changed, 63 insertions(+)
> > > >
> > > > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> > > > index 2ad5dad1bf..03fd812d5a 100644
> > > > --- a/hw/arm/virt-acpi-build.c
> > > > +++ b/hw/arm/virt-acpi-build.c
> > > > @@ -436,6 +436,64 @@ build_srat(GArray *table_data, BIOSLinker *linker,
> > > VirtMachineState *vms)
> > > >                    vms->oem_table_id);
> > > >   }
> > > >
> > > > +/* PPTT */
> > > > +static void
> > > > +build_pptt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> > > > +{
> > > > +    int pptt_start = table_data->len;
> > > > +    int uid = 0, cpus = 0, socket = 0;
> > > > +    MachineState *ms = MACHINE(vms);
> > > > +    unsigned int smp_cores = ms->smp.cores;
> > > > +    unsigned int smp_threads = ms->smp.threads;
> > > > +
> > > > +    acpi_data_push(table_data, sizeof(AcpiTableHeader));
> > > > +
> > > > +    for (socket = 0; cpus < ms->possible_cpus->len; socket++) {
> > > > +        uint32_t socket_offset = table_data->len - pptt_start;
> > > > +        int core;
> > > > +
> > > > +        build_processor_hierarchy_node(
> > > > +            table_data, 1, /* Physical package */
> > > > +            0, socket, /* No parent */
> > > > +            NULL, 0);  /* No private resources */
> > > > +
> > > > +        for (core = 0; core < smp_cores; core++) {
> > > > +            uint32_t core_offset = table_data->len - pptt_start;
> > > > +            int thread;
> > > > +
> > > > +            if (smp_threads <= 1) {
> > > > +                build_processor_hierarchy_node(
> > > > +                    table_data,
> > > > +                    (1 << 1) | /* ACPI Processor ID valid */
> > > > +                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> > > > +                    socket_offset, uid++, /* Parent is a Socket */
> > > > +                    NULL, 0);  /* No private resources */
> > > > +            } else {
> > > > +                build_processor_hierarchy_node(
> > > > +                    table_data, 0,
> > > > +                    socket_offset, core, /* Parent is a Socket */
> > > > +                    NULL, 0); /* No private resources */
> > > > +
> > > > +                for (thread = 0; thread < smp_threads; thread++) {
> > > > +                    build_processor_hierarchy_node(
> > > > +                        table_data,
> > > > +                        (1 << 1) | /* ACPI Processor ID valid */
> > > > +                        (1 << 2) | /* ACPI 6.3 - Processor is a Thread */
> > > > +                        (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
> > > > +                        core_offset, uid++, /* Parent is a Core */
> > > > +                        NULL, 0);  /* No private resources */
> > > > +                }
> > > > +            }
> > > > +        }
> > > > +        cpus += smp_cores * smp_threads;
> > > > +    }
> > > > +
> > > > +    build_header(linker, table_data,
> > > > +                 (void *)(table_data->data + pptt_start), "PPTT",
> > > > +                 table_data->len - pptt_start, 2,
> > > > +                 vms->oem_id, vms->oem_table_id);
> > > > +}
> > > > +
> > > >   /* GTDT */
> > > >   static void
> > > >   build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState
> *vms)
> > > > @@ -707,6 +765,11 @@ void virt_acpi_build(VirtMachineState *vms,
> > > AcpiBuildTables *tables)
> > > >       acpi_add_table(table_offsets, tables_blob);
> > > >       build_madt(tables_blob, tables->linker, vms);
> > > >
> > > > +    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
> > > I'm not really sure why we need to care about "ms->smp.cpus > 1" here?
> > >
> > > IMO, just like MADT in which we create both ENABLED and DISABLED
> > > gicc nodes no matter of number of ENABLED nodes is one or not, we
> > > should create PPTT table for all the possible cpus and not care about
> > > number of smp cpus, too. This will be more consistent with the ACPI
> > > specification and the PPTT table will be used for ACPI cpu hotplug in
> > > the future even with  "smp.cpus == 1".
> >
> >
> > A humble request:
> > Let us not anticipate the changes of vcpu Hotplug here. Things are fluid
> > with respect to the vcpu Hotplug right now and I think it will not be
> > right to base PPTT Table changes in anticipation of something we are not
> > sure of what it looks like.
> >
> > Any such decisions should be postponed and be made part of the actual
> > vcpu Hotplug changes when(and if ever) they come for ARM64. This will
> > also ensure proper review of such changes and useful in that particular
> > context.
> 
> Hi Salil,
> 
> Can you please elaborate on this and send some pointers to the hot plug
> discussions? 

Hi Andrew,
As you are aware, ACPI based vcpu Hotplug is under contention right now.
It is being discussed within the ARM to have Hotplug mechanism which does
not involves QEMU<->Guest ACPI Hotplug exchanges and are purely based on
PSCI triggers(which might take a different ACPI path). If you wish you can
join Linaro Open Discussion meeting for the same. All these discussions
have been happening there

https://linaro.atlassian.net/wiki/spaces/LOD/pages/26844463630/2021-5-25+Meeting+Meeting+notes


You're not saying that we shouldn't try to generate PPTT
> tables for AArch64 guests until a solution for hot plug has been
> determined, are you? 

Sorry, I did not mean that. Changes of PPTT are independent to vcpu
Hotplug support and are still required without it. No problem with that.


If so, I don't think I would agree with that. There
> are benefits to properly describing cpu topology to guests, even without
> hot plug.

Agreed. No second thoughts on that.

 Those benefits, when vcpu pinning is used, are the same benefits
> as for the host, which already use PPTT tables to describe topology, even
> though hot plug isn't supported.

yes sure, you mean pinning vcpus according to the cpu topology for performance?

> 
> Now, if you're saying we should only generate tables for smp.cpus, not

Correct. This is what I thought we must be doing even now

> smp.maxcpus, because hot plug isn't supported anyway, then I see your
> point. But, it'd be better to require smp.cpus == smp.maxcpus in our
> smp_parse function to do that, which we've never done before, so we may
> have trouble supporting existing command lines.

I am trying to recall, if the vcpu Hotplug is not supported then can they
ever be different?

cpus =  (threads * cores * sockets)

static void smp_parse(MachineState *ms, QemuOpts *opts)
{
     [...]

        if (sockets * cores * threads != ms->smp.max_cpus) {
            warn_report("Invalid CPU topology deprecated: "
                        "sockets (%u) * cores (%u) * threads (%u) "
                        "!= maxcpus (%u)",
                        sockets, cores, threads,
                        ms->smp.max_cpus);
        }
     [...]
}
  
Although, above check does not exit(1) and just warns on detecting invalid
CPU topology. Not sure why?

Well if you think there are subtleties to support above implementation and
we cannot do it now then sure it is your call. :)

I just thought to slim the patch-set down and club the relevant logic to the
places where they ideally would have made more sense to review.


Thanks
Salil.
Andrew Jones May 18, 2021, 7:05 p.m. UTC | #9
On Tue, May 18, 2021 at 06:34:08PM +0000, Salil Mehta wrote:
>  Those benefits, when vcpu pinning is used, are the same benefits
> > as for the host, which already use PPTT tables to describe topology, even
> > though hot plug isn't supported.
> 
> yes sure, you mean pinning vcpus according to the cpu topology for performance?

Yup

> 
> > 
> > Now, if you're saying we should only generate tables for smp.cpus, not
> 
> Correct. This is what I thought we must be doing even now
> 
> > smp.maxcpus, because hot plug isn't supported anyway, then I see your
> > point. But, it'd be better to require smp.cpus == smp.maxcpus in our
> > smp_parse function to do that, which we've never done before, so we may
> > have trouble supporting existing command lines.
> 
> I am trying to recall, if the vcpu Hotplug is not supported then can they
> ever be different?
> 
> cpus =  (threads * cores * sockets)
> 
> static void smp_parse(MachineState *ms, QemuOpts *opts)
> {
>      [...]
> 
>         if (sockets * cores * threads != ms->smp.max_cpus) {
>             warn_report("Invalid CPU topology deprecated: "
>                         "sockets (%u) * cores (%u) * threads (%u) "
>                         "!= maxcpus (%u)",
>                         sockets, cores, threads,
>                         ms->smp.max_cpus);
>         }
>      [...]
> }
>   
> Although, above check does not exit(1) and just warns on detecting invalid
> CPU topology. Not sure why?

Hmm, not sure what code you have there. I see this in
hw/core/machine.c:smp_parse

        if (ms->smp.max_cpus < cpus) {
            error_report("maxcpus must be equal to or greater than smp");
            exit(1);
        }

        if (sockets * cores * threads != ms->smp.max_cpus) {
            error_report("Invalid CPU topology: "
                         "sockets (%u) * cores (%u) * threads (%u) "
                         "!= maxcpus (%u)",
                         sockets, cores, threads,
                         ms->smp.max_cpus);
            exit(1);
        }

> 
> Well if you think there are subtleties to support above implementation and
> we cannot do it now then sure it is your call. :)

The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
we're proposing that it should. Maybe we can start erroring out when
cpus != maxcpus until hot plug is supported?

Thanks,
drew
Salil Mehta May 18, 2021, 7:22 p.m. UTC | #10
> From: Andrew Jones [mailto:drjones@redhat.com]
> Sent: Tuesday, May 18, 2021 8:06 PM
> To: Salil Mehta <salil.mehta@huawei.com>
> Cc: wangyanan (Y) <wangyanan55@huawei.com>; Peter Maydell
> <peter.maydell@linaro.org>; Michael S . Tsirkin <mst@redhat.com>; Wanghaibin
> (D) <wanghaibin.wang@huawei.com>; qemu-devel@nongnu.org; Shannon Zhao
> <shannon.zhaosl@gmail.com>; qemu-arm@nongnu.org; Alistair Francis
> <alistair.francis@wdc.com>; Zengtao (B) <prime.zeng@hisilicon.com>;
> yangyicong <yangyicong@huawei.com>; yuzenghui <yuzenghui@huawei.com>; Igor
> Mammedov <imammedo@redhat.com>; zhukeqian <zhukeqian1@huawei.com>; lijiajie (H)
> <lijiajie11@huawei.com>; David Gibson <david@gibson.dropbear.id.au>; Linuxarm
> <linuxarm@huawei.com>; linuxarm@openeuler.org
> Subject: Re: [RFC PATCH v2 5/6] hw/arm/virt-acpi-build: Add PPTT table
> 
> On Tue, May 18, 2021 at 06:34:08PM +0000, Salil Mehta wrote:
> >  Those benefits, when vcpu pinning is used, are the same benefits
> > > as for the host, which already use PPTT tables to describe topology, even
> > > though hot plug isn't supported.
> >
> > yes sure, you mean pinning vcpus according to the cpu topology for performance?
> 
> Yup

Already Agreed :)

> > > Now, if you're saying we should only generate tables for smp.cpus, not
> >
> > Correct. This is what I thought we must be doing even now
> >
> > > smp.maxcpus, because hot plug isn't supported anyway, then I see your
> > > point. But, it'd be better to require smp.cpus == smp.maxcpus in our
> > > smp_parse function to do that, which we've never done before, so we may
> > > have trouble supporting existing command lines.
> >
> > I am trying to recall, if the vcpu Hotplug is not supported then can they
> > ever be different?
> >
> > cpus =  (threads * cores * sockets)
> >
> > static void smp_parse(MachineState *ms, QemuOpts *opts)
> > {
> >      [...]
> >
> >         if (sockets * cores * threads != ms->smp.max_cpus) {
> >             warn_report("Invalid CPU topology deprecated: "
> >                         "sockets (%u) * cores (%u) * threads (%u) "
> >                         "!= maxcpus (%u)",
> >                         sockets, cores, threads,
> >                         ms->smp.max_cpus);
> >         }
> >      [...]
> > }
> >
> > Although, above check does not exit(1) and just warns on detecting invalid
> > CPU topology. Not sure why?
> 
> Hmm, not sure what code you have there. I see this in
> hw/core/machine.c:smp_parse
> 
>         if (ms->smp.max_cpus < cpus) {
>             error_report("maxcpus must be equal to or greater than smp");
>             exit(1);
>         }
> 
>         if (sockets * cores * threads != ms->smp.max_cpus) {
>             error_report("Invalid CPU topology: "
>                          "sockets (%u) * cores (%u) * threads (%u) "
>                          "!= maxcpus (%u)",
>                          sockets, cores, threads,
>                          ms->smp.max_cpus);
>             exit(1);
>         }
> 
> >
> > Well if you think there are subtleties to support above implementation and
> > we cannot do it now then sure it is your call. :)
> 
> The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
> we're proposing that it should. Maybe we can start erroring out when
> cpus != maxcpus until hot plug is supported?

Agreed, both don't make any sense if hotplug is not supported and ideally should
fail with error. We should block any such topology configuration.


Thanks
Salil
wangyanan (Y) May 19, 2021, 3:18 a.m. UTC | #11
On 2021/5/19 3:22, Salil Mehta wrote:
>> From: Andrew Jones [mailto:drjones@redhat.com]
>> Sent: Tuesday, May 18, 2021 8:06 PM
>> To: Salil Mehta <salil.mehta@huawei.com>
>> Cc: wangyanan (Y) <wangyanan55@huawei.com>; Peter Maydell
>> <peter.maydell@linaro.org>; Michael S . Tsirkin <mst@redhat.com>; Wanghaibin
>> (D) <wanghaibin.wang@huawei.com>; qemu-devel@nongnu.org; Shannon Zhao
>> <shannon.zhaosl@gmail.com>; qemu-arm@nongnu.org; Alistair Francis
>> <alistair.francis@wdc.com>; Zengtao (B) <prime.zeng@hisilicon.com>;
>> yangyicong <yangyicong@huawei.com>; yuzenghui <yuzenghui@huawei.com>; Igor
>> Mammedov <imammedo@redhat.com>; zhukeqian <zhukeqian1@huawei.com>; lijiajie (H)
>> <lijiajie11@huawei.com>; David Gibson <david@gibson.dropbear.id.au>; Linuxarm
>> <linuxarm@huawei.com>; linuxarm@openeuler.org
>> Subject: Re: [RFC PATCH v2 5/6] hw/arm/virt-acpi-build: Add PPTT table
>>
>> On Tue, May 18, 2021 at 06:34:08PM +0000, Salil Mehta wrote:
>>>   Those benefits, when vcpu pinning is used, are the same benefits
>>>> as for the host, which already use PPTT tables to describe topology, even
>>>> though hot plug isn't supported.
>>> yes sure, you mean pinning vcpus according to the cpu topology for performance?
>> Yup
> Already Agreed :)
>
>>>> Now, if you're saying we should only generate tables for smp.cpus, not
>>> Correct. This is what I thought we must be doing even now
>>>
>>>> smp.maxcpus, because hot plug isn't supported anyway, then I see your
>>>> point. But, it'd be better to require smp.cpus == smp.maxcpus in our
>>>> smp_parse function to do that, which we've never done before, so we may
>>>> have trouble supporting existing command lines.
>>> I am trying to recall, if the vcpu Hotplug is not supported then can they
>>> ever be different?
>>>
>>> cpus =  (threads * cores * sockets)
>>>
>>> static void smp_parse(MachineState *ms, QemuOpts *opts)
>>> {
>>>       [...]
>>>
>>>          if (sockets * cores * threads != ms->smp.max_cpus) {
>>>              warn_report("Invalid CPU topology deprecated: "
>>>                          "sockets (%u) * cores (%u) * threads (%u) "
>>>                          "!= maxcpus (%u)",
>>>                          sockets, cores, threads,
>>>                          ms->smp.max_cpus);
>>>          }
>>>       [...]
>>> }
>>>
>>> Although, above check does not exit(1) and just warns on detecting invalid
>>> CPU topology. Not sure why?
>> Hmm, not sure what code you have there. I see this in
>> hw/core/machine.c:smp_parse
>>
>>          if (ms->smp.max_cpus < cpus) {
>>              error_report("maxcpus must be equal to or greater than smp");
>>              exit(1);
>>          }
>>
>>          if (sockets * cores * threads != ms->smp.max_cpus) {
>>              error_report("Invalid CPU topology: "
>>                           "sockets (%u) * cores (%u) * threads (%u) "
>>                           "!= maxcpus (%u)",
>>                           sockets, cores, threads,
>>                           ms->smp.max_cpus);
>>              exit(1);
>>          }
>>
>>> Well if you think there are subtleties to support above implementation and
>>> we cannot do it now then sure it is your call. :)
Hi Salil, Drew,
>> The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
>> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
>> we're proposing that it should. Maybe we can start erroring out when
>> cpus != maxcpus until hot plug is supported?
> Agreed, both don't make any sense if hotplug is not supported and ideally should
> fail with error. We should block any such topology configuration.
In the ARM-specific function virt_smp_parse() (patch 9), there already
have been some restrictions for the given -smp configuration.
We now only allow:
-smp N
-smp maxcpus=M
-smp N, maxcpus=M

-smp N, sockets=X, cores=Y
-smp N, sockets=X, cores=Y, threads=Z

-smp maxcpus=M, sockets=X, cores=Y
-smp maxcpus=M, sockets=X, cores=Y, threads=Z

-smp N, maxcpus=M, sockets=X, cores=Y
-smp N, maxcpus=M, sockets=X, cores=Y, threads=Z

and disallow the other strange and rare formats that shouldn't be provided.

It's reasonable to block the topology configuration which is not useful
currently. I will add the requirement for "cpus==maxcpus" in this fuction
if the possible conflict with existing command lines is not a big problem.

Thanks,
Yanan
>
> Thanks
> Salil
> .
Salil Mehta May 19, 2021, 7:54 a.m. UTC | #12
> From: wangyanan (Y)
> Sent: Wednesday, May 19, 2021 4:18 AM
> 
> 
> On 2021/5/19 3:22, Salil Mehta wrote:
> >> From: Andrew Jones [mailto:drjones@redhat.com]
> >> Sent: Tuesday, May 18, 2021 8:06 PM
> >> To: Salil Mehta <salil.mehta@huawei.com>
> >> Cc: wangyanan (Y) <wangyanan55@huawei.com>; Peter Maydell
> >> <peter.maydell@linaro.org>; Michael S . Tsirkin <mst@redhat.com>; Wanghaibin
> >> (D) <wanghaibin.wang@huawei.com>; qemu-devel@nongnu.org; Shannon Zhao
> >> <shannon.zhaosl@gmail.com>; qemu-arm@nongnu.org; Alistair Francis
> >> <alistair.francis@wdc.com>; Zengtao (B) <prime.zeng@hisilicon.com>;
> >> yangyicong <yangyicong@huawei.com>; yuzenghui <yuzenghui@huawei.com>; Igor
> >> Mammedov <imammedo@redhat.com>; zhukeqian <zhukeqian1@huawei.com>; lijiajie
> (H)
> >> <lijiajie11@huawei.com>; David Gibson <david@gibson.dropbear.id.au>;
> Linuxarm
> >> <linuxarm@huawei.com>; linuxarm@openeuler.org
> >> Subject: Re: [RFC PATCH v2 5/6] hw/arm/virt-acpi-build: Add PPTT table
> >>
> >> On Tue, May 18, 2021 at 06:34:08PM +0000, Salil Mehta wrote:
> >>>   Those benefits, when vcpu pinning is used, are the same benefits
> >>>> as for the host, which already use PPTT tables to describe topology, even
> >>>> though hot plug isn't supported.
> >>> yes sure, you mean pinning vcpus according to the cpu topology for performance?
> >> Yup
> > Already Agreed :)
> >
> >>>> Now, if you're saying we should only generate tables for smp.cpus, not
> >>> Correct. This is what I thought we must be doing even now
> >>>
> >>>> smp.maxcpus, because hot plug isn't supported anyway, then I see your
> >>>> point. But, it'd be better to require smp.cpus == smp.maxcpus in our
> >>>> smp_parse function to do that, which we've never done before, so we may
> >>>> have trouble supporting existing command lines.
> >>> I am trying to recall, if the vcpu Hotplug is not supported then can they
> >>> ever be different?
> >>>
> >>> cpus =  (threads * cores * sockets)
> >>>
> >>> static void smp_parse(MachineState *ms, QemuOpts *opts)
> >>> {
> >>>       [...]
> >>>
> >>>          if (sockets * cores * threads != ms->smp.max_cpus) {
> >>>              warn_report("Invalid CPU topology deprecated: "
> >>>                          "sockets (%u) * cores (%u) * threads (%u) "
> >>>                          "!= maxcpus (%u)",
> >>>                          sockets, cores, threads,
> >>>                          ms->smp.max_cpus);
> >>>          }
> >>>       [...]
> >>> }
> >>>
> >>> Although, above check does not exit(1) and just warns on detecting invalid
> >>> CPU topology. Not sure why?
> >> Hmm, not sure what code you have there. I see this in
> >> hw/core/machine.c:smp_parse
> >>
> >>          if (ms->smp.max_cpus < cpus) {
> >>              error_report("maxcpus must be equal to or greater than smp");
> >>              exit(1);
> >>          }
> >>
> >>          if (sockets * cores * threads != ms->smp.max_cpus) {
> >>              error_report("Invalid CPU topology: "
> >>                           "sockets (%u) * cores (%u) * threads (%u) "
> >>                           "!= maxcpus (%u)",
> >>                           sockets, cores, threads,
> >>                           ms->smp.max_cpus);
> >>              exit(1);
> >>          }
> >>
> >>> Well if you think there are subtleties to support above implementation and
> >>> we cannot do it now then sure it is your call. :)
> Hi Salil, Drew,
> >> The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
> >> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
> >> we're proposing that it should. Maybe we can start erroring out when
> >> cpus != maxcpus until hot plug is supported?
> > Agreed, both don't make any sense if hotplug is not supported and ideally should
> > fail with error. We should block any such topology configuration.
> In the ARM-specific function virt_smp_parse() (patch 9), there already
> have been some restrictions for the given -smp configuration.
> We now only allow:
> -smp N
> -smp maxcpus=M
> -smp N, maxcpus=M
> 
> -smp N, sockets=X, cores=Y
> -smp N, sockets=X, cores=Y, threads=Z
> 
> -smp maxcpus=M, sockets=X, cores=Y
> -smp maxcpus=M, sockets=X, cores=Y, threads=Z
> 
> -smp N, maxcpus=M, sockets=X, cores=Y
> -smp N, maxcpus=M, sockets=X, cores=Y, threads=Z
> 
> and disallow the other strange and rare formats that shouldn't be provided.
> 
> It's reasonable to block the topology configuration which is not useful
> currently. I will add the requirement for "cpus==maxcpus" in this fuction
> if the possible conflict with existing command lines is not a big problem.

Hi Yanan,
Makes sense. I did see your other patch-set in which cluster support has been
added. Are we deferring that too?

Thanks
Andrew Jones May 19, 2021, 8:15 a.m. UTC | #13
On Wed, May 19, 2021 at 07:54:37AM +0000, Salil Mehta wrote:
> > From: wangyanan (Y)
> > Sent: Wednesday, May 19, 2021 4:18 AM
> > 
> > 
> > On 2021/5/19 3:22, Salil Mehta wrote:
> > >> From: Andrew Jones [mailto:drjones@redhat.com]
> > >> Sent: Tuesday, May 18, 2021 8:06 PM
> > >> To: Salil Mehta <salil.mehta@huawei.com>
> > >> Cc: wangyanan (Y) <wangyanan55@huawei.com>; Peter Maydell
> > >> <peter.maydell@linaro.org>; Michael S . Tsirkin <mst@redhat.com>; Wanghaibin
> > >> (D) <wanghaibin.wang@huawei.com>; qemu-devel@nongnu.org; Shannon Zhao
> > >> <shannon.zhaosl@gmail.com>; qemu-arm@nongnu.org; Alistair Francis
> > >> <alistair.francis@wdc.com>; Zengtao (B) <prime.zeng@hisilicon.com>;
> > >> yangyicong <yangyicong@huawei.com>; yuzenghui <yuzenghui@huawei.com>; Igor
> > >> Mammedov <imammedo@redhat.com>; zhukeqian <zhukeqian1@huawei.com>; lijiajie
> > (H)
> > >> <lijiajie11@huawei.com>; David Gibson <david@gibson.dropbear.id.au>;
> > Linuxarm
> > >> <linuxarm@huawei.com>; linuxarm@openeuler.org
> > >> Subject: Re: [RFC PATCH v2 5/6] hw/arm/virt-acpi-build: Add PPTT table
> > >>
> > >> On Tue, May 18, 2021 at 06:34:08PM +0000, Salil Mehta wrote:
> > >>>   Those benefits, when vcpu pinning is used, are the same benefits
> > >>>> as for the host, which already use PPTT tables to describe topology, even
> > >>>> though hot plug isn't supported.
> > >>> yes sure, you mean pinning vcpus according to the cpu topology for performance?
> > >> Yup
> > > Already Agreed :)
> > >
> > >>>> Now, if you're saying we should only generate tables for smp.cpus, not
> > >>> Correct. This is what I thought we must be doing even now
> > >>>
> > >>>> smp.maxcpus, because hot plug isn't supported anyway, then I see your
> > >>>> point. But, it'd be better to require smp.cpus == smp.maxcpus in our
> > >>>> smp_parse function to do that, which we've never done before, so we may
> > >>>> have trouble supporting existing command lines.
> > >>> I am trying to recall, if the vcpu Hotplug is not supported then can they
> > >>> ever be different?
> > >>>
> > >>> cpus =  (threads * cores * sockets)
> > >>>
> > >>> static void smp_parse(MachineState *ms, QemuOpts *opts)
> > >>> {
> > >>>       [...]
> > >>>
> > >>>          if (sockets * cores * threads != ms->smp.max_cpus) {
> > >>>              warn_report("Invalid CPU topology deprecated: "
> > >>>                          "sockets (%u) * cores (%u) * threads (%u) "
> > >>>                          "!= maxcpus (%u)",
> > >>>                          sockets, cores, threads,
> > >>>                          ms->smp.max_cpus);
> > >>>          }
> > >>>       [...]
> > >>> }
> > >>>
> > >>> Although, above check does not exit(1) and just warns on detecting invalid
> > >>> CPU topology. Not sure why?
> > >> Hmm, not sure what code you have there. I see this in
> > >> hw/core/machine.c:smp_parse
> > >>
> > >>          if (ms->smp.max_cpus < cpus) {
> > >>              error_report("maxcpus must be equal to or greater than smp");
> > >>              exit(1);
> > >>          }
> > >>
> > >>          if (sockets * cores * threads != ms->smp.max_cpus) {
> > >>              error_report("Invalid CPU topology: "
> > >>                           "sockets (%u) * cores (%u) * threads (%u) "
> > >>                           "!= maxcpus (%u)",
> > >>                           sockets, cores, threads,
> > >>                           ms->smp.max_cpus);
> > >>              exit(1);
> > >>          }
> > >>
> > >>> Well if you think there are subtleties to support above implementation and
> > >>> we cannot do it now then sure it is your call. :)
> > Hi Salil, Drew,
> > >> The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
> > >> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
> > >> we're proposing that it should. Maybe we can start erroring out when
> > >> cpus != maxcpus until hot plug is supported?
> > > Agreed, both don't make any sense if hotplug is not supported and ideally should
> > > fail with error. We should block any such topology configuration.
> > In the ARM-specific function virt_smp_parse() (patch 9), there already
> > have been some restrictions for the given -smp configuration.
> > We now only allow:
> > -smp N
> > -smp maxcpus=M
> > -smp N, maxcpus=M
> > 
> > -smp N, sockets=X, cores=Y
> > -smp N, sockets=X, cores=Y, threads=Z
> > 
> > -smp maxcpus=M, sockets=X, cores=Y
> > -smp maxcpus=M, sockets=X, cores=Y, threads=Z
> > 
> > -smp N, maxcpus=M, sockets=X, cores=Y
> > -smp N, maxcpus=M, sockets=X, cores=Y, threads=Z
> > 
> > and disallow the other strange and rare formats that shouldn't be provided.
> > 
> > It's reasonable to block the topology configuration which is not useful
> > currently. I will add the requirement for "cpus==maxcpus" in this fuction
> > if the possible conflict with existing command lines is not a big problem.
> 
> Hi Yanan,
> Makes sense. I did see your other patch-set in which cluster support has been
> added. Are we deferring that too?

The merge of that needs to be deferred, but for a different reason. It
shouldn't impact hot plug, because if hot plug doesn't like clusters,
then one could configure a topology which doesn't have clusters. But,
it can't be merged to QEMU until the kernel has merged its support.

Thanks,
drew
Andrew Jones May 19, 2021, 8:27 a.m. UTC | #14
On Tue, May 18, 2021 at 09:05:39PM +0200, Andrew Jones wrote:
> The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
> we're proposing that it should. Maybe we can start erroring out when
> cpus != maxcpus until hot plug is supported?
>

The more I think about this, the more I think we're in a bit of pickle and
need Peter Maydell to chime in. While we may want to make our -smp command
line option parsing more strict in order to bring some sanity to it, if
we do, then we'll break existing command lines, which, while may be
specifying useless inputs, have always gotten away with it. We probably
can't just change that now without forcing the user to opt into it.
Maybe we need to add another -smp parameter like 'strict' that has to
be set to 'on' in order to get this new behavior.

Peter, do you have some suggestions for this? A summary of the problem
we'd like to solve is as follows:

 We'd like to start describing CPU topology to guests when provided
 topology information with the '-smp ...' command line option. Currently,
 a user may provide nearly whatever it wants on that command line option
 and not get an error, even though the guest will not get a topology
 description. When building the topology its important to know what
 the user actually wants, so we're proposing to require both sockets
 and cores be given if one of them is given. Also, since we don't yet
 support hot plug for AArch64, we're proposing to enforce cpus == maxcpus.

Is it fine to make those changes to the parsing for 6.1 and later? (Note,
mach-virt will override the default smp_parse with its own, so this is
mach-virt specific.) Or, should we only do this if a new parameter is
also given, e.g. 'strict'. Something like

  -smp strict=on,cpus=4,sockets=2,cores=2

would be needed by users who want to describe cpu topologies. Without
a strict description, then they get what they get today for their
DT/ACPI topology description, nothing.

Thanks,
drew
wangyanan (Y) May 19, 2021, 8:42 a.m. UTC | #15
On 2021/5/19 16:15, Andrew Jones wrote:
> On Wed, May 19, 2021 at 07:54:37AM +0000, Salil Mehta wrote:
>>> From: wangyanan (Y)
>>> Sent: Wednesday, May 19, 2021 4:18 AM
>>>
>>>
>>> On 2021/5/19 3:22, Salil Mehta wrote:
>>>>> From: Andrew Jones [mailto:drjones@redhat.com]
>>>>> Sent: Tuesday, May 18, 2021 8:06 PM
>>>>> To: Salil Mehta <salil.mehta@huawei.com>
>>>>> Cc: wangyanan (Y) <wangyanan55@huawei.com>; Peter Maydell
>>>>> <peter.maydell@linaro.org>; Michael S . Tsirkin <mst@redhat.com>; Wanghaibin
>>>>> (D) <wanghaibin.wang@huawei.com>; qemu-devel@nongnu.org; Shannon Zhao
>>>>> <shannon.zhaosl@gmail.com>; qemu-arm@nongnu.org; Alistair Francis
>>>>> <alistair.francis@wdc.com>; Zengtao (B) <prime.zeng@hisilicon.com>;
>>>>> yangyicong <yangyicong@huawei.com>; yuzenghui <yuzenghui@huawei.com>; Igor
>>>>> Mammedov <imammedo@redhat.com>; zhukeqian <zhukeqian1@huawei.com>; lijiajie
>>> (H)
>>>>> <lijiajie11@huawei.com>; David Gibson <david@gibson.dropbear.id.au>;
>>> Linuxarm
>>>>> <linuxarm@huawei.com>; linuxarm@openeuler.org
>>>>> Subject: Re: [RFC PATCH v2 5/6] hw/arm/virt-acpi-build: Add PPTT table
>>>>>
>>>>> On Tue, May 18, 2021 at 06:34:08PM +0000, Salil Mehta wrote:
>>>>>>    Those benefits, when vcpu pinning is used, are the same benefits
>>>>>>> as for the host, which already use PPTT tables to describe topology, even
>>>>>>> though hot plug isn't supported.
>>>>>> yes sure, you mean pinning vcpus according to the cpu topology for performance?
>>>>> Yup
>>>> Already Agreed :)
>>>>
>>>>>>> Now, if you're saying we should only generate tables for smp.cpus, not
>>>>>> Correct. This is what I thought we must be doing even now
>>>>>>
>>>>>>> smp.maxcpus, because hot plug isn't supported anyway, then I see your
>>>>>>> point. But, it'd be better to require smp.cpus == smp.maxcpus in our
>>>>>>> smp_parse function to do that, which we've never done before, so we may
>>>>>>> have trouble supporting existing command lines.
>>>>>> I am trying to recall, if the vcpu Hotplug is not supported then can they
>>>>>> ever be different?
>>>>>>
>>>>>> cpus =  (threads * cores * sockets)
>>>>>>
>>>>>> static void smp_parse(MachineState *ms, QemuOpts *opts)
>>>>>> {
>>>>>>        [...]
>>>>>>
>>>>>>           if (sockets * cores * threads != ms->smp.max_cpus) {
>>>>>>               warn_report("Invalid CPU topology deprecated: "
>>>>>>                           "sockets (%u) * cores (%u) * threads (%u) "
>>>>>>                           "!= maxcpus (%u)",
>>>>>>                           sockets, cores, threads,
>>>>>>                           ms->smp.max_cpus);
>>>>>>           }
>>>>>>        [...]
>>>>>> }
>>>>>>
>>>>>> Although, above check does not exit(1) and just warns on detecting invalid
>>>>>> CPU topology. Not sure why?
>>>>> Hmm, not sure what code you have there. I see this in
>>>>> hw/core/machine.c:smp_parse
>>>>>
>>>>>           if (ms->smp.max_cpus < cpus) {
>>>>>               error_report("maxcpus must be equal to or greater than smp");
>>>>>               exit(1);
>>>>>           }
>>>>>
>>>>>           if (sockets * cores * threads != ms->smp.max_cpus) {
>>>>>               error_report("Invalid CPU topology: "
>>>>>                            "sockets (%u) * cores (%u) * threads (%u) "
>>>>>                            "!= maxcpus (%u)",
>>>>>                            sockets, cores, threads,
>>>>>                            ms->smp.max_cpus);
>>>>>               exit(1);
>>>>>           }
>>>>>
>>>>>> Well if you think there are subtleties to support above implementation and
>>>>>> we cannot do it now then sure it is your call. :)
>>> Hi Salil, Drew,
>>>>> The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
>>>>> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
>>>>> we're proposing that it should. Maybe we can start erroring out when
>>>>> cpus != maxcpus until hot plug is supported?
>>>> Agreed, both don't make any sense if hotplug is not supported and ideally should
>>>> fail with error. We should block any such topology configuration.
>>> In the ARM-specific function virt_smp_parse() (patch 9), there already
>>> have been some restrictions for the given -smp configuration.
>>> We now only allow:
>>> -smp N
>>> -smp maxcpus=M
>>> -smp N, maxcpus=M
>>>
>>> -smp N, sockets=X, cores=Y
>>> -smp N, sockets=X, cores=Y, threads=Z
>>>
>>> -smp maxcpus=M, sockets=X, cores=Y
>>> -smp maxcpus=M, sockets=X, cores=Y, threads=Z
>>>
>>> -smp N, maxcpus=M, sockets=X, cores=Y
>>> -smp N, maxcpus=M, sockets=X, cores=Y, threads=Z
>>>
>>> and disallow the other strange and rare formats that shouldn't be provided.
>>>
>>> It's reasonable to block the topology configuration which is not useful
>>> currently. I will add the requirement for "cpus==maxcpus" in this fuction
>>> if the possible conflict with existing command lines is not a big problem.
>> Hi Yanan,
>> Makes sense. I did see your other patch-set in which cluster support has been
>> added. Are we deferring that too?
> The merge of that needs to be deferred, but for a different reason. It
> shouldn't impact hot plug, because if hot plug doesn't like clusters,
> then one could configure a topology which doesn't have clusters. But,
> it can't be merged to QEMU until the kernel has merged its support.
Agreed!

Thanks,
Yanan
> Thanks,
> drew
>
> .
Salil Mehta May 19, 2021, 10 a.m. UTC | #16
> From: Andrew Jones [mailto:drjones@redhat.com]
> Sent: Wednesday, May 19, 2021 9:15 AM
> 
> On Wed, May 19, 2021 at 07:54:37AM +0000, Salil Mehta wrote:
> > > From: wangyanan (Y)
> > > Sent: Wednesday, May 19, 2021 4:18 AM
> > >
> > >
> > > On 2021/5/19 3:22, Salil Mehta wrote:
> > > >> From: Andrew Jones [mailto:drjones@redhat.com]
> > > >> Sent: Tuesday, May 18, 2021 8:06 PM
> > > >> To: Salil Mehta <salil.mehta@huawei.com>
> > > >> Cc: wangyanan (Y) <wangyanan55@huawei.com>; Peter Maydell
> > > >> <peter.maydell@linaro.org>; Michael S . Tsirkin <mst@redhat.com>; Wanghaibin
> > > >> (D) <wanghaibin.wang@huawei.com>; qemu-devel@nongnu.org; Shannon Zhao
> > > >> <shannon.zhaosl@gmail.com>; qemu-arm@nongnu.org; Alistair Francis
> > > >> <alistair.francis@wdc.com>; Zengtao (B) <prime.zeng@hisilicon.com>;
> > > >> yangyicong <yangyicong@huawei.com>; yuzenghui <yuzenghui@huawei.com>; Igor
> > > >> Mammedov <imammedo@redhat.com>; zhukeqian <zhukeqian1@huawei.com>; lijiajie (H)
> > > >> <lijiajie11@huawei.com>; David Gibson <david@gibson.dropbear.id.au>; Linuxarm
> > > >> <linuxarm@huawei.com>; linuxarm@openeuler.org
> > > >> Subject: Re: [RFC PATCH v2 5/6] hw/arm/virt-acpi-build: Add PPTT table
> > > >>
> > > >> On Tue, May 18, 2021 at 06:34:08PM +0000, Salil Mehta wrote:
> > > >>>   Those benefits, when vcpu pinning is used, are the same benefits
> > > >>>> as for the host, which already use PPTT tables to describe topology, even
> > > >>>> though hot plug isn't supported.
> > > >>> yes sure, you mean pinning vcpus according to the cpu topology for performance?
> > > >> Yup
> > > > Already Agreed :)
> > > >
> > > >>>> Now, if you're saying we should only generate tables for smp.cpus, not
> > > >>> Correct. This is what I thought we must be doing even now
> > > >>>
> > > >>>> smp.maxcpus, because hot plug isn't supported anyway, then I see your
> > > >>>> point. But, it'd be better to require smp.cpus == smp.maxcpus in our
> > > >>>> smp_parse function to do that, which we've never done before, so we may
> > > >>>> have trouble supporting existing command lines.
> > > >>> I am trying to recall, if the vcpu Hotplug is not supported then can they
> > > >>> ever be different?
> > > >>>
> > > >>> cpus =  (threads * cores * sockets)
> > > >>>
> > > >>> static void smp_parse(MachineState *ms, QemuOpts *opts)
> > > >>> {
> > > >>>       [...]
> > > >>>
> > > >>>          if (sockets * cores * threads != ms->smp.max_cpus) {
> > > >>>              warn_report("Invalid CPU topology deprecated: "
> > > >>>                          "sockets (%u) * cores (%u) * threads (%u) "
> > > >>>                          "!= maxcpus (%u)",
> > > >>>                          sockets, cores, threads,
> > > >>>                          ms->smp.max_cpus);
> > > >>>          }
> > > >>>       [...]
> > > >>> }
> > > >>>
> > > >>> Although, above check does not exit(1) and just warns on detecting invalid
> > > >>> CPU topology. Not sure why?
> > > >> Hmm, not sure what code you have there. I see this in
> > > >> hw/core/machine.c:smp_parse
> > > >>
> > > >>          if (ms->smp.max_cpus < cpus) {
> > > >>              error_report("maxcpus must be equal to or greater than smp");
> > > >>              exit(1);
> > > >>          }
> > > >>
> > > >>          if (sockets * cores * threads != ms->smp.max_cpus) {
> > > >>              error_report("Invalid CPU topology: "
> > > >>                           "sockets (%u) * cores (%u) * threads (%u) "
> > > >>                           "!= maxcpus (%u)",
> > > >>                           sockets, cores, threads,
> > > >>                           ms->smp.max_cpus);
> > > >>              exit(1);
> > > >>          }
> > > >>
> > > >>> Well if you think there are subtleties to support above implementation and
> > > >>> we cannot do it now then sure it is your call. :)
> > > Hi Salil, Drew,
> > > >> The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
> > > >> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
> > > >> we're proposing that it should. Maybe we can start erroring out when
> > > >> cpus != maxcpus until hot plug is supported?
> > > > Agreed, both don't make any sense if hotplug is not supported and ideally should
> > > > fail with error. We should block any such topology configuration.
> > > In the ARM-specific function virt_smp_parse() (patch 9), there already
> > > have been some restrictions for the given -smp configuration.
> > > We now only allow:
> > > -smp N
> > > -smp maxcpus=M
> > > -smp N, maxcpus=M
> > >
> > > -smp N, sockets=X, cores=Y
> > > -smp N, sockets=X, cores=Y, threads=Z
> > >
> > > -smp maxcpus=M, sockets=X, cores=Y
> > > -smp maxcpus=M, sockets=X, cores=Y, threads=Z
> > >
> > > -smp N, maxcpus=M, sockets=X, cores=Y
> > > -smp N, maxcpus=M, sockets=X, cores=Y, threads=Z
> > >
> > > and disallow the other strange and rare formats that shouldn't be provided.
> > >
> > > It's reasonable to block the topology configuration which is not useful
> > > currently. I will add the requirement for "cpus==maxcpus" in this fuction
> > > if the possible conflict with existing command lines is not a big problem.
> >
> > Hi Yanan,
> > Makes sense. I did see your other patch-set in which cluster support has been
> > added. Are we deferring that too?
> 
> The merge of that needs to be deferred, but for a different reason. It
> shouldn't impact hot plug, because if hot plug doesn't like clusters,
> then one could configure a topology which doesn't have clusters. But,

yes, agreed.

> it can't be merged to QEMU until the kernel has merged its support.

sure.
wangyanan (Y) May 19, 2021, 1:26 p.m. UTC | #17
Hi Drew,

On 2021/5/19 16:27, Andrew Jones wrote:
> On Tue, May 18, 2021 at 09:05:39PM +0200, Andrew Jones wrote:
>> The problem is that -smp 4,maxcpus=8 doesn't error out today, even though
>> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out either, but
>> we're proposing that it should. Maybe we can start erroring out when
>> cpus != maxcpus until hot plug is supported?
>>
> The more I think about this, the more I think we're in a bit of pickle and
> need Peter Maydell to chime in. While we may want to make our -smp command
> line option parsing more strict in order to bring some sanity to it, if
> we do, then we'll break existing command lines, which, while may be
> specifying useless inputs, have always gotten away with it. We probably
> can't just change that now without forcing the user to opt into it.
> Maybe we need to add another -smp parameter like 'strict' that has to
> be set to 'on' in order to get this new behavior.
>
> Peter, do you have some suggestions for this? A summary of the problem
> we'd like to solve is as follows:
>
>   We'd like to start describing CPU topology to guests when provided
>   topology information with the '-smp ...' command line option. Currently,
>   a user may provide nearly whatever it wants on that command line option
>   and not get an error, even though the guest will not get a topology
>   description. When building the topology its important to know what
>   the user actually wants, so we're proposing to require both sockets
>   and cores be given if one of them is given. Also, since we don't yet
>   support hot plug for AArch64, we're proposing to enforce cpus == maxcpus.
>
> Is it fine to make those changes to the parsing for 6.1 and later? (Note,
> mach-virt will override the default smp_parse with its own, so this is
> mach-virt specific.) Or, should we only do this if a new parameter is
> also given, e.g. 'strict'. Something like
>
>    -smp strict=on,cpus=4,sockets=2,cores=2
>
> would be needed by users who want to describe cpu topologies. Without
> a strict description, then they get what they get today for their
> DT/ACPI topology description, nothing.
 From my point of view, I like the idea of a new parameter like 
"strict=on/off".
I will explain the reason below but maybe I have missed something, so I also
hope for some suggestions from Peter. :)

1) We don't need to worry about breaking any existing -smp command lines
including the rare and strange ones any more, since we will only have more
strict requirement for the new provided cmdlines with "strict=on" and only
generate topology description to guest with these new cmdlines provided.

2) This will provide an option for users to decide whether to enable the 
feature
or not. Furthermore, this feature can also work on older machine types, 
if a user
want to make use of cpu topology exposure to guest on older machines and is
also sure it won't affect the application's behavior, then he can read 
the Doc and
properly provided a -smp cmdline with "strict=on" to boot a VM.

3) We don't need to bother guessing different formats of -smp command lines
in parsing. If the new parameter is not specified or "strict=off" is 
provided, we
totally follow the rules in smp_parse() and disable the topology 
exposure. And if
"strict=on" is provided, we enable the topology exposure and enforce 
completely
detailed configuration like "-smp strict=on,cpus=4,sockets=2,cores=2".

But maxcpus will be optional, it will default to cpus if not provided. 
We also ensure
it matches cpus if provided, given that cpu hotplug is not available yet.

Thanks,
Yanan
> Thanks,
> drew
>
> .
wangyanan (Y) May 19, 2021, 1:40 p.m. UTC | #18
On 2021/5/19 21:26, wangyanan (Y) wrote:
> Hi Drew,
>
> On 2021/5/19 16:27, Andrew Jones wrote:
>> On Tue, May 18, 2021 at 09:05:39PM +0200, Andrew Jones wrote:
>>> The problem is that -smp 4,maxcpus=8 doesn't error out today, even 
>>> though
>>> it doesn't do anything. OTOH, -smp 4,cores=2 doesn't error out 
>>> either, but
>>> we're proposing that it should. Maybe we can start erroring out when
>>> cpus != maxcpus until hot plug is supported?
>>>
>> The more I think about this, the more I think we're in a bit of 
>> pickle and
>> need Peter Maydell to chime in. While we may want to make our -smp 
>> command
>> line option parsing more strict in order to bring some sanity to it, if
>> we do, then we'll break existing command lines, which, while may be
>> specifying useless inputs, have always gotten away with it. We probably
>> can't just change that now without forcing the user to opt into it.
>> Maybe we need to add another -smp parameter like 'strict' that has to
>> be set to 'on' in order to get this new behavior.
>>
>> Peter, do you have some suggestions for this? A summary of the problem
>> we'd like to solve is as follows:
>>
>>   We'd like to start describing CPU topology to guests when provided
>>   topology information with the '-smp ...' command line option. 
>> Currently,
>>   a user may provide nearly whatever it wants on that command line 
>> option
>>   and not get an error, even though the guest will not get a topology
>>   description. When building the topology its important to know what
>>   the user actually wants, so we're proposing to require both sockets
>>   and cores be given if one of them is given. Also, since we don't yet
>>   support hot plug for AArch64, we're proposing to enforce cpus == 
>> maxcpus.
>>
>> Is it fine to make those changes to the parsing for 6.1 and later? 
>> (Note,
>> mach-virt will override the default smp_parse with its own, so this is
>> mach-virt specific.) Or, should we only do this if a new parameter is
>> also given, e.g. 'strict'. Something like
>>
>>    -smp strict=on,cpus=4,sockets=2,cores=2
>>
>> would be needed by users who want to describe cpu topologies. Without
>> a strict description, then they get what they get today for their
>> DT/ACPI topology description, nothing.
> From my point of view, I like the idea of a new parameter like 
> "strict=on/off".
> I will explain the reason below but maybe I have missed something, so 
> I also
> hope for some suggestions from Peter. :)
>
> 1) We don't need to worry about breaking any existing -smp command lines
> including the rare and strange ones any more, since we will only have 
> more
> strict requirement for the new provided cmdlines with "strict=on" and 
> only
> generate topology description to guest with these new cmdlines provided.
>
> 2) This will provide an option for users to decide whether to enable 
> the feature
> or not. Furthermore, this feature can also work on older machine 
> types, if a user
> want to make use of cpu topology exposure to guest on older machines 
> and is
> also sure it won't affect the application's behavior, then he can read 
> the Doc and
> properly provided a -smp cmdline with "strict=on" to boot a VM.
>
> 3) We don't need to bother guessing different formats of -smp command 
> lines
> in parsing. If the new parameter is not specified or "strict=off" is 
> provided, we
> totally follow the rules in smp_parse() and disable the topology 
> exposure. And if
> "strict=on" is provided, we enable the topology exposure and enforce 
> completely
> detailed configuration like "-smp strict=on,cpus=4,sockets=2,cores=2".
IMO, threads should also be required here.
Libvirt requires all of them if one of sockets/cores/threads is provided.
So if we hope to be consistent with Libvirt, the required configuration
should at least "-smp strict=on,cpus=4,sockets=2,cores=2,threads=1".

Thanks,
Yanan
>
> But maxcpus will be optional, it will default to cpus if not provided. 
> We also ensure
> it matches cpus if provided, given that cpu hotplug is not available yet.
>
> Thanks,
> Yanan
>> Thanks,
>> drew
>>
>> .
diff mbox series

Patch

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 2ad5dad1bf..03fd812d5a 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -436,6 +436,64 @@  build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
                  vms->oem_table_id);
 }
 
+/* PPTT */
+static void
+build_pptt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
+{
+    int pptt_start = table_data->len;
+    int uid = 0, cpus = 0, socket = 0;
+    MachineState *ms = MACHINE(vms);
+    unsigned int smp_cores = ms->smp.cores;
+    unsigned int smp_threads = ms->smp.threads;
+
+    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+
+    for (socket = 0; cpus < ms->possible_cpus->len; socket++) {
+        uint32_t socket_offset = table_data->len - pptt_start;
+        int core;
+
+        build_processor_hierarchy_node(
+            table_data, 1, /* Physical package */
+            0, socket, /* No parent */
+            NULL, 0);  /* No private resources */
+
+        for (core = 0; core < smp_cores; core++) {
+            uint32_t core_offset = table_data->len - pptt_start;
+            int thread;
+
+            if (smp_threads <= 1) {
+                build_processor_hierarchy_node(
+                    table_data,
+                    (1 << 1) | /* ACPI Processor ID valid */
+                    (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
+                    socket_offset, uid++, /* Parent is a Socket */
+                    NULL, 0);  /* No private resources */
+            } else {
+                build_processor_hierarchy_node(
+                    table_data, 0,
+                    socket_offset, core, /* Parent is a Socket */
+                    NULL, 0); /* No private resources */
+
+                for (thread = 0; thread < smp_threads; thread++) {
+                    build_processor_hierarchy_node(
+                        table_data,
+                        (1 << 1) | /* ACPI Processor ID valid */
+                        (1 << 2) | /* ACPI 6.3 - Processor is a Thread */
+                        (1 << 3),  /* ACPI 6.3 - Node is a Leaf */
+                        core_offset, uid++, /* Parent is a Core */
+                        NULL, 0);  /* No private resources */
+                }
+            }
+        }
+        cpus += smp_cores * smp_threads;
+    }
+
+    build_header(linker, table_data,
+                 (void *)(table_data->data + pptt_start), "PPTT",
+                 table_data->len - pptt_start, 2,
+                 vms->oem_id, vms->oem_table_id);
+}
+
 /* GTDT */
 static void
 build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
@@ -707,6 +765,11 @@  void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
     acpi_add_table(table_offsets, tables_blob);
     build_madt(tables_blob, tables->linker, vms);
 
+    if (ms->smp.cpus > 1 && !vmc->no_cpu_topology) {
+        acpi_add_table(table_offsets, tables_blob);
+        build_pptt(tables_blob, tables->linker, vms);
+    }
+
     acpi_add_table(table_offsets, tables_blob);
     build_gtdt(tables_blob, tables->linker, vms);