diff mbox series

target/arm: fix MPIDR value for ARM CPUs with SMT

Message ID 20240419183135.12276-1-dorjoychy111@gmail.com
State New
Headers show
Series target/arm: fix MPIDR value for ARM CPUs with SMT | expand

Commit Message

Dorjoy Chowdhury April 19, 2024, 6:31 p.m. UTC
Some ARM CPUs advertise themselves as SMT by having the MT[24] bit set
to 1 in the MPIDR register. These CPUs have the thread id in Aff0[7:0]
bits, CPU id in Aff1[15:8] bits and cluster id in Aff2[23:16] bits in
MPIDR.

On the other hand, ARM CPUs without SMT have the MT[24] bit set to 0,
CPU id in Aff0[7:0] bits and cluster id in Aff1[15:8] bits in MPIDR.

The mpidr_read_val() function always reported non-SMT i.e., MT=0 style
MPIDR value which means it was wrong for the following CPUs with SMT
supported by QEMU:
    - cortex-a55
    - cortex-a76
    - cortex-a710
    - neoverse-v1
    - neoverse-n1
    - neoverse-n2

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1608
Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com>
---
 hw/arm/npcm7xx.c       |  2 +-
 hw/arm/sbsa-ref.c      | 21 ++++++++++++++++-----
 hw/arm/virt.c          | 18 +++++++++++++++---
 target/arm/cpu.c       | 14 ++++++++++++--
 target/arm/cpu.h       |  5 ++++-
 target/arm/helper.c    |  4 ++++
 target/arm/tcg/cpu64.c | 12 ++++++++++++
 7 files changed, 64 insertions(+), 12 deletions(-)

Comments

Richard Henderson April 21, 2024, 5:40 a.m. UTC | #1
On 4/19/24 11:31, Dorjoy Chowdhury wrote:
> +
> +    /*
> +     * Instantiate a temporary CPU object to build mp_affinity
> +     * of the possible CPUs.
> +     */
> +    cpuobj = object_new(ms->cpu_type);
> +    armcpu = ARM_CPU(cpuobj);
> +
>       for (n = 0; n < ms->possible_cpus->len; n++) {
>           ms->possible_cpus->cpus[n].type = ms->cpu_type;
>           ms->possible_cpus->cpus[n].arch_id =
> -            sbsa_ref_cpu_mp_affinity(sms, n);
> +            sbsa_ref_cpu_mp_affinity(armcpu, n);
>           ms->possible_cpus->cpus[n].props.has_thread_id = true;
>           ms->possible_cpus->cpus[n].props.thread_id = n;
>       }
> +
> +    object_unref(cpuobj);

Why is this instantiation necessary?

> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -1314,8 +1314,18 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
>       }
>   }
>   
> -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
> +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
>   {
> +    if (cpu->has_smt) {
> +        /*
> +         * Right now, the ARM CPUs with SMT supported by QEMU only have
> +         * one thread per core. So Aff0 is always 0.
> +         */

Well, this isn't true.

     -smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]
                    [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]

I would expect all of Aff[0-3] to be settable with the proper topology parameters.


r~
Dorjoy Chowdhury April 21, 2024, 8:40 a.m. UTC | #2
On Sun, Apr 21, 2024 at 11:40 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 4/19/24 11:31, Dorjoy Chowdhury wrote:
> > +
> > +    /*
> > +     * Instantiate a temporary CPU object to build mp_affinity
> > +     * of the possible CPUs.
> > +     */
> > +    cpuobj = object_new(ms->cpu_type);
> > +    armcpu = ARM_CPU(cpuobj);
> > +
> >       for (n = 0; n < ms->possible_cpus->len; n++) {
> >           ms->possible_cpus->cpus[n].type = ms->cpu_type;
> >           ms->possible_cpus->cpus[n].arch_id =
> > -            sbsa_ref_cpu_mp_affinity(sms, n);
> > +            sbsa_ref_cpu_mp_affinity(armcpu, n);
> >           ms->possible_cpus->cpus[n].props.has_thread_id = true;
> >           ms->possible_cpus->cpus[n].props.thread_id = n;
> >       }
> > +
> > +    object_unref(cpuobj);
>
> Why is this instantiation necessary?
>

The instantiation is necessary (like the one in hw/arm/virt.c in
virt_possible_cpu_arch_ids) because the
"sbsa_ref_possible_cpu_arch_ids" function is called (via
possible_cpu_arch_ids) before the CPUs are created in the
"sbsa_ref_init" function. There was some related discussion here
(qemu-devel archive URL):
https://mail.gnu.org/archive/html/qemu-devel/2024-04/msg02074.html . I
picked this up from the same thing done in hw/arm/virt.c in the
"machvirt_init" function in the "if (!vms->memap) {" block.

> > --- a/target/arm/cpu.c
> > +++ b/target/arm/cpu.c
> > @@ -1314,8 +1314,18 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
> >       }
> >   }
> >
> > -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
> > +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
> >   {
> > +    if (cpu->has_smt) {
> > +        /*
> > +         * Right now, the ARM CPUs with SMT supported by QEMU only have
> > +         * one thread per core. So Aff0 is always 0.
> > +         */
>
> Well, this isn't true.
>
>      -smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]
>                     [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]
>
> I would expect all of Aff[0-3] to be settable with the proper topology parameters.
>

Sorry, maybe I misunderstood. From the gitlab bug ticket (URL:
https://gitlab.com/qemu-project/qemu/-/issues/1608) and the discussion
in qemu-devel (URL:
https://mail.gnu.org/archive/html/qemu-devel/2024-04/msg01964.html) I
looked at the technical reference manuals of the MPIDR register of the
a-55, a-76, a-710, neoverse-v1, neoverse-n1, neoverse-n2 CPUs and all
of them had MT=1 and one thread per core so Aff0 is always 0.

I don't know what Aff3 should be set to. I see this comment in the
target/arm/cpu.c "arm_cpu_realizefn" function
   /* This cpu-id-to-MPIDR affinity is used only for TCG; KVM will
override it.│
     * We don't support setting cluster ID ([16..23]) (known as Aff2
         │
     * in later ARM ARM versions), or any of the higher affinity level
fields,  │
     * so these bits always RAZ.
         │
     */
Right now we don't seem to set Aff3[39:32] at all in the mp_affinity
property. Let me know what should be the expected behavior here as I
don't have enough knowledge here. I will try to fix it. Thanks!

Regards,
Dorjoy
Peter Maydell April 22, 2024, 10:46 a.m. UTC | #3
On Sun, 21 Apr 2024 at 06:40, Richard Henderson
<richard.henderson@linaro.org> wrote:
> > --- a/target/arm/cpu.c
> > +++ b/target/arm/cpu.c
> > @@ -1314,8 +1314,18 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
> >       }
> >   }
> >
> > -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
> > +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
> >   {
> > +    if (cpu->has_smt) {
> > +        /*
> > +         * Right now, the ARM CPUs with SMT supported by QEMU only have
> > +         * one thread per core. So Aff0 is always 0.
> > +         */
>
> Well, this isn't true.
>
>      -smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]
>                     [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]
>
> I would expect all of Aff[0-3] to be settable with the proper topology parameters.

As I understand it the MPIDR value is more or less independent
of the topology information as presented to the guest OS.
The options to the -smp command set the firmware topology
information, which doesn't/shouldn't affect the reported
MPIDR values, and in particular shouldn't change whether
the CPU selected has the MT bit set or not.

For Arm's CPUs they fall into two categories:
 * older ones don't set MT in their MPIDR, and the Aff0
   field is effectively the CPU number
 * newer ones do set MT in their MPIDR, but don't have
   SMT, so their Aff0 is always 0 and their Aff1
   is the CPU number

Of all the CPUs we model, none of them are the
architecturally-permitted "MT is set, CPU implements
actual SMT, Aff0 indicates the thread in the CPU" type.

For TCG all the topology info is pretty irrelevant anyway:
if the user wants to tell the guest OS that it should
do NUMA partitioning (assuming threads or otherwise),
that's up to them.

thanks
-- PMM
Peter Maydell April 22, 2024, 11:26 a.m. UTC | #4
On Mon, 22 Apr 2024 at 11:46, Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Sun, 21 Apr 2024 at 06:40, Richard Henderson
> <richard.henderson@linaro.org> wrote:
> > > --- a/target/arm/cpu.c
> > > +++ b/target/arm/cpu.c
> > > @@ -1314,8 +1314,18 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
> > >       }
> > >   }
> > >
> > > -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
> > > +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
> > >   {
> > > +    if (cpu->has_smt) {
> > > +        /*
> > > +         * Right now, the ARM CPUs with SMT supported by QEMU only have
> > > +         * one thread per core. So Aff0 is always 0.
> > > +         */
> >
> > Well, this isn't true.
> >
> >      -smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]
> >                     [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]
> >
> > I would expect all of Aff[0-3] to be settable with the proper topology parameters.
>
> As I understand it the MPIDR value is more or less independent
> of the topology information as presented to the guest OS.
> The options to the -smp command set the firmware topology
> information, which doesn't/shouldn't affect the reported
> MPIDR values, and in particular shouldn't change whether
> the CPU selected has the MT bit set or not.
>
> For Arm's CPUs they fall into two categories:
>  * older ones don't set MT in their MPIDR, and the Aff0
>    field is effectively the CPU number
>  * newer ones do set MT in their MPIDR, but don't have
>    SMT, so their Aff0 is always 0 and their Aff1
>    is the CPU number
>
> Of all the CPUs we model, none of them are the
> architecturally-permitted "MT is set, CPU implements
> actual SMT, Aff0 indicates the thread in the CPU" type.

Looking at the TRM, Neoverse-E1 is "MT is set, actual SMT,
Aff0 is the thread" (Aff0 can be 0 or 1). We just don't
model that CPU type yet. But we should probably make
sure we don't block ourselves into a corner where that
would be awkward -- I'll have a think about this and
look at what x86 does with the topology info.

thanks
-- PMM
Richard Henderson April 22, 2024, 3:21 p.m. UTC | #5
On 4/22/24 04:26, Peter Maydell wrote:
> On Mon, 22 Apr 2024 at 11:46, Peter Maydell <peter.maydell@linaro.org> wrote:
>>
>> On Sun, 21 Apr 2024 at 06:40, Richard Henderson
>> <richard.henderson@linaro.org> wrote:
>>>> --- a/target/arm/cpu.c
>>>> +++ b/target/arm/cpu.c
>>>> @@ -1314,8 +1314,18 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
>>>>        }
>>>>    }
>>>>
>>>> -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
>>>> +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
>>>>    {
>>>> +    if (cpu->has_smt) {
>>>> +        /*
>>>> +         * Right now, the ARM CPUs with SMT supported by QEMU only have
>>>> +         * one thread per core. So Aff0 is always 0.
>>>> +         */
>>>
>>> Well, this isn't true.
>>>
>>>       -smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]
>>>                      [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]
>>>
>>> I would expect all of Aff[0-3] to be settable with the proper topology parameters.
>>
>> As I understand it the MPIDR value is more or less independent
>> of the topology information as presented to the guest OS.
>> The options to the -smp command set the firmware topology
>> information, which doesn't/shouldn't affect the reported
>> MPIDR values, and in particular shouldn't change whether
>> the CPU selected has the MT bit set or not.
>>
>> For Arm's CPUs they fall into two categories:
>>   * older ones don't set MT in their MPIDR, and the Aff0
>>     field is effectively the CPU number
>>   * newer ones do set MT in their MPIDR, but don't have
>>     SMT, so their Aff0 is always 0 and their Aff1
>>     is the CPU number
>>
>> Of all the CPUs we model, none of them are the
>> architecturally-permitted "MT is set, CPU implements
>> actual SMT, Aff0 indicates the thread in the CPU" type.
> 
> Looking at the TRM, Neoverse-E1 is "MT is set, actual SMT,
> Aff0 is the thread" (Aff0 can be 0 or 1). We just don't
> model that CPU type yet. But we should probably make
> sure we don't block ourselves into a corner where that
> would be awkward -- I'll have a think about this and
> look at what x86 does with the topology info.

I'm suggesting that we set things up per -smp, and if the user chooses a -cpu value for 
which that topology doesn't make sense, we do it anyway and let them keep both pieces.


r~
Richard Henderson April 22, 2024, 3:24 p.m. UTC | #6
On 4/22/24 08:21, Richard Henderson wrote:
>>> For Arm's CPUs they fall into two categories:
>>>   * older ones don't set MT in their MPIDR, and the Aff0
>>>     field is effectively the CPU number
>>>   * newer ones do set MT in their MPIDR, but don't have
>>>     SMT, so their Aff0 is always 0 and their Aff1
>>>     is the CPU number
>>>
>>> Of all the CPUs we model, none of them are the
>>> architecturally-permitted "MT is set, CPU implements
>>> actual SMT, Aff0 indicates the thread in the CPU" type.
>>
>> Looking at the TRM, Neoverse-E1 is "MT is set, actual SMT,
>> Aff0 is the thread" (Aff0 can be 0 or 1). We just don't
>> model that CPU type yet. But we should probably make
>> sure we don't block ourselves into a corner where that
>> would be awkward -- I'll have a think about this and
>> look at what x86 does with the topology info.
> 
> I'm suggesting that we set things up per -smp, and if the user chooses a -cpu value for 
> which that topology doesn't make sense, we do it anyway and let them keep both pieces.

... but more practically, it allows experimentation at -cpu max, without having to build 
in anything cpu-specific.  Good to know about the E1 though...


r~
Dorjoy Chowdhury April 25, 2024, 4:46 p.m. UTC | #7
On Mon, Apr 22, 2024 at 5:26 PM Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Mon, 22 Apr 2024 at 11:46, Peter Maydell <peter.maydell@linaro.org> wrote:
> >
> > On Sun, 21 Apr 2024 at 06:40, Richard Henderson
> > <richard.henderson@linaro.org> wrote:
> > > > --- a/target/arm/cpu.c
> > > > +++ b/target/arm/cpu.c
> > > > @@ -1314,8 +1314,18 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
> > > >       }
> > > >   }
> > > >
> > > > -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
> > > > +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
> > > >   {
> > > > +    if (cpu->has_smt) {
> > > > +        /*
> > > > +         * Right now, the ARM CPUs with SMT supported by QEMU only have
> > > > +         * one thread per core. So Aff0 is always 0.
> > > > +         */
> > >
> > > Well, this isn't true.
> > >
> > >      -smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]
> > >                     [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]
> > >
> > > I would expect all of Aff[0-3] to be settable with the proper topology parameters.
> >
> > As I understand it the MPIDR value is more or less independent
> > of the topology information as presented to the guest OS.
> > The options to the -smp command set the firmware topology
> > information, which doesn't/shouldn't affect the reported
> > MPIDR values, and in particular shouldn't change whether
> > the CPU selected has the MT bit set or not.
> >
> > For Arm's CPUs they fall into two categories:
> >  * older ones don't set MT in their MPIDR, and the Aff0
> >    field is effectively the CPU number
> >  * newer ones do set MT in their MPIDR, but don't have
> >    SMT, so their Aff0 is always 0 and their Aff1
> >    is the CPU number
> >
> > Of all the CPUs we model, none of them are the
> > architecturally-permitted "MT is set, CPU implements
> > actual SMT, Aff0 indicates the thread in the CPU" type.
>
> Looking at the TRM, Neoverse-E1 is "MT is set, actual SMT,
> Aff0 is the thread" (Aff0 can be 0 or 1). We just don't
> model that CPU type yet. But we should probably make
> sure we don't block ourselves into a corner where that
> would be awkward -- I'll have a think about this and
> look at what x86 does with the topology info.
>

Thanks. Let me know what should be done as part of this patch to
properly fix the issue if you get a chance to look at it.

Regards,
Dorjoy
Marcin Juszkiewicz May 1, 2024, 6:08 p.m. UTC | #8
W dniu 22.04.2024 o 17:21, Richard Henderson pisze:
>>> For Arm's CPUs they fall into two categories:
>>>   * older ones don't set MT in their MPIDR, and the Aff0
>>>     field is effectively the CPU number
>>>   * newer ones do set MT in their MPIDR, but don't have
>>>     SMT, so their Aff0 is always 0 and their Aff1
>>>     is the CPU number
>>>
>>> Of all the CPUs we model, none of them are the
>>> architecturally-permitted "MT is set, CPU implements
>>> actual SMT, Aff0 indicates the thread in the CPU" type.
>>
>> Looking at the TRM, Neoverse-E1 is "MT is set, actual SMT,
>> Aff0 is the thread" (Aff0 can be 0 or 1). We just don't
>> model that CPU type yet. But we should probably make
>> sure we don't block ourselves into a corner where that
>> would be awkward -- I'll have a think about this and
>> look at what x86 does with the topology info.
> 
> I'm suggesting that we set things up per -smp, and if the user chooses a 
> -cpu value for which that topology doesn't make sense, we do it anyway 
> and let them keep both pieces.

Aff[0-3] are 8 bit each. On those cpus where they exist.

So "-smp 512" (maximum allowed for sbsa-ref) would need to be split to 2 
clusters by 256 cores or 64 clusters of 8 cores each like it is today so 
it is backward compatible with whatever assumption firmware/OS does.

But if we go for 'newer, better MPIDR_EL1' then maybe it is time to set 
U bit [30] if "-smp X,sockets=Y" where Y > 1? Or when NUMA config with 
multiple cpu nodes are setup.

Also a way to know which AffX fields to check on firmware/OS side would 
be nice. A57/72 use Aff[1-2], N1+ use Aff[0-3]. Sure, it can be checked 
by going through cores, reading then MPIDR_EL1 and if 7:0 has same value 
on all of them then check Aff[1-3], otherwise Aff[1-2].
Peter Maydell May 2, 2024, 9:11 a.m. UTC | #9
On Wed, 1 May 2024 at 19:08, Marcin Juszkiewicz
<marcin.juszkiewicz@linaro.org> wrote:
>
> W dniu 22.04.2024 o 17:21, Richard Henderson pisze:
> >>> For Arm's CPUs they fall into two categories:
> >>>   * older ones don't set MT in their MPIDR, and the Aff0
> >>>     field is effectively the CPU number
> >>>   * newer ones do set MT in their MPIDR, but don't have
> >>>     SMT, so their Aff0 is always 0 and their Aff1
> >>>     is the CPU number
> >>>
> >>> Of all the CPUs we model, none of them are the
> >>> architecturally-permitted "MT is set, CPU implements
> >>> actual SMT, Aff0 indicates the thread in the CPU" type.
> >>
> >> Looking at the TRM, Neoverse-E1 is "MT is set, actual SMT,
> >> Aff0 is the thread" (Aff0 can be 0 or 1). We just don't
> >> model that CPU type yet. But we should probably make
> >> sure we don't block ourselves into a corner where that
> >> would be awkward -- I'll have a think about this and
> >> look at what x86 does with the topology info.
> >
> > I'm suggesting that we set things up per -smp, and if the user chooses a
> > -cpu value for which that topology doesn't make sense, we do it anyway
> > and let them keep both pieces.
>
> Aff[0-3] are 8 bit each. On those cpus where they exist.
>
> So "-smp 512" (maximum allowed for sbsa-ref) would need to be split to 2
> clusters by 256 cores or 64 clusters of 8 cores each like it is today so
> it is backward compatible with whatever assumption firmware/OS does.
>
> But if we go for 'newer, better MPIDR_EL1' then maybe it is time to set
> U bit [30] if "-smp X,sockets=Y" where Y > 1? Or when NUMA config with
> multiple cpu nodes are setup.

The U bit is a red herring here -- it indicates a property of the
implementation, not of the topology. It is an almost obsolete
bit that might have been useful on 32-bit cores (eg the A9 had
a UP version and an MP version; we only implement the MP version,
and the Cortex-R5 sets the U bit). I think any modern CPU will have
U=0, whether it happens to be the only CPU in the system or not.

> Also a way to know which AffX fields to check on firmware/OS side would
> be nice. A57/72 use Aff[1-2], N1+ use Aff[0-3]. Sure, it can be checked
> by going through cores, reading then MPIDR_EL1 and if 7:0 has same value
> on all of them then check Aff[1-3], otherwise Aff[1-2].

I think the answer here is "guest software doesn't rely on the Aff
fields to indicate topology". See eg this patch from back in 2020
to the kernel:
https://patchwork.kernel.org/project/linux-arm-kernel/patch/20200829130016.26106-1-valentin.schneider@arm.com/
(now kernel commit 3102bc0e6ac752cc5) which notes that the amount
of implementation leniency in the Aff* definitions and some of
the odd stuff that has been shipped in practice means that you
can't get useful topology information out of the MPIDR (which is
why we provide it in the ACPI/dtb).

On the QEMU side I guess we should strive to set up the MPIDR
fields to something plausibly matching the topology as defined
by the user on the command line. Unanswered questions:

 * I guess we need some kind of back-compat thing where for
   old machine types we continue to report the old MPIDR
 * what are the constraints on the Aff* fields (eg that kernel
   commit suggests Aff0 shouldn't be > 15)?
 * should user attempts to eg ask for a topology with threads > 1
   but a CPU type which isn't multithreaded be an error?
   (What is the existing QEMU x86 practice here?)

thanks
-- PMM
Peter Maydell May 2, 2024, 10:37 a.m. UTC | #10
On Thu, 2 May 2024 at 10:11, Peter Maydell <peter.maydell@linaro.org> wrote:
> On the QEMU side I guess we should strive to set up the MPIDR
> fields to something plausibly matching the topology as defined
> by the user on the command line. Unanswered questions:
>
>  * I guess we need some kind of back-compat thing where for
>    old machine types we continue to report the old MPIDR
>  * what are the constraints on the Aff* fields (eg that kernel
>    commit suggests Aff0 shouldn't be > 15)?

This one is apparently related to GICv3 -- if the GIC doesn't
implement RangeSelector support in ICC_SGI0R_EL1 and other
places (advertised via GICD_TYPER.RSS and ICC_CTLR_EL1.SS) then
there's no way to send an SGI to a CPU whose Aff0 is outside
[0..15], and so you shouldn't build a system with Aff0 > 15.
QEMU's GICv3 doesn't implement the RSS functionality (though it
wouldn't be hard to add if we really cared), so we should also
keep Aff0 in [0..15].

We have ARM_DEFAULT_CPUS_PER_CLUSTER = 8, which does keep us
in that range. I don't think there's really a good reason for
it to be 8 rather than 16: this might be legacy from GICv2?

-- PMM
Marcin Juszkiewicz May 2, 2024, 10:56 a.m. UTC | #11
W dniu 2.05.2024 o 12:37, Peter Maydell pisze:
>>   * what are the constraints on the Aff* fields (eg that kernel
>>     commit suggests Aff0 shouldn't be > 15)?

> This one is apparently related to GICv3 -- if the GIC doesn't
> implement RangeSelector support in ICC_SGI0R_EL1 and other
> places (advertised via GICD_TYPER.RSS and ICC_CTLR_EL1.SS) then
> there's no way to send an SGI to a CPU whose Aff0 is outside
> [0..15], and so you shouldn't build a system with Aff0 > 15.
> QEMU's GICv3 doesn't implement the RSS functionality (though it
> wouldn't be hard to add if we really cared), so we should also
> keep Aff0 in [0..15].

Arm/virt uses 8 cores/cluster on GICv2 and 16 cores/cluster on GICv3 as 
this is amount of SGI target-list bits available.

Arm/sbsa-ref goes with 8 cores per cluster by use of 
ARM_DEFAULT_CPUS_PER_CLUSTER.

> We have ARM_DEFAULT_CPUS_PER_CLUSTER = 8, which does keep us
> in that range. I don't think there's really a good reason for
> it to be 8 rather than 16: this might be legacy from GICv2?

GICv2 supported only 8 cores.
Peter Maydell May 2, 2024, 11:40 a.m. UTC | #12
On Thu, 2 May 2024 at 11:56, Marcin Juszkiewicz
<marcin.juszkiewicz@linaro.org> wrote:
>
> W dniu 2.05.2024 o 12:37, Peter Maydell pisze:
> >>   * what are the constraints on the Aff* fields (eg that kernel
> >>     commit suggests Aff0 shouldn't be > 15)?
>
> > This one is apparently related to GICv3 -- if the GIC doesn't
> > implement RangeSelector support in ICC_SGI0R_EL1 and other
> > places (advertised via GICD_TYPER.RSS and ICC_CTLR_EL1.SS) then
> > there's no way to send an SGI to a CPU whose Aff0 is outside
> > [0..15], and so you shouldn't build a system with Aff0 > 15.
> > QEMU's GICv3 doesn't implement the RSS functionality (though it
> > wouldn't be hard to add if we really cared), so we should also
> > keep Aff0 in [0..15].
>
> Arm/virt uses 8 cores/cluster on GICv2 and 16 cores/cluster on GICv3 as
> this is amount of SGI target-list bits available.

You can't have more than 8 cores full stop on GICv2, though,
so you'll never be able to go beyond 8 cores/cluster
even if we didn't impose that limit.

-- PMM
Marcin Juszkiewicz May 2, 2024, 12:14 p.m. UTC | #13
W dniu 19.04.2024 o 20:31, Dorjoy Chowdhury pisze:
> -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
> +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
>   {
> +    if (cpu->has_smt) {
> +        /*
> +         * Right now, the ARM CPUs with SMT supported by QEMU only have
> +         * one thread per core. So Aff0 is always 0.
> +         */
> +        uint32_t Aff2 = idx / clustersz;
> +        uint32_t Aff1 = idx % clustersz;
> +        uint32_t Aff0 = 0;
> +        return (Aff2 << ARM_AFF2_SHIFT) | (Aff1 << ARM_AFF1_SHIFT) | Aff0;
> +    }

Should "return" also have "(1 << 24) |" to have MT=1 set?

Otherwise MPIDR_EL1 = 0x000100 can mean core0 in cluster1 or core1 in 
cluster0.

Value 0x1000100 shows MT=1 so thread0 in core1 in cluster0.
Dorjoy Chowdhury May 2, 2024, 1:04 p.m. UTC | #14
On Thu, May 2, 2024 at 6:14 PM Marcin Juszkiewicz
<marcin.juszkiewicz@linaro.org> wrote:
>
> W dniu 19.04.2024 o 20:31, Dorjoy Chowdhury pisze:
> > -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
> > +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
> >   {
> > +    if (cpu->has_smt) {
> > +        /*
> > +         * Right now, the ARM CPUs with SMT supported by QEMU only have
> > +         * one thread per core. So Aff0 is always 0.
> > +         */
> > +        uint32_t Aff2 = idx / clustersz;
> > +        uint32_t Aff1 = idx % clustersz;
> > +        uint32_t Aff0 = 0;
> > +        return (Aff2 << ARM_AFF2_SHIFT) | (Aff1 << ARM_AFF1_SHIFT) | Aff0;
> > +    }
>
> Should "return" also have "(1 << 24) |" to have MT=1 set?
>
> Otherwise MPIDR_EL1 = 0x000100 can mean core0 in cluster1 or core1 in
> cluster0.
>
> Value 0x1000100 shows MT=1 so thread0 in core1 in cluster0.

I don't know all the details but from what I understand the
"arm_build_mp_afiinity" is used to set the "mp_affinity" member
variable which I assume is about affinity, not the whole MPIDR
register value. That is what I assumed because the Uniprocessor
indication bit(30) is being set only in the "mpidr_read_val" function.
In the patch, the MT bit is also being set in the "mpidr_read_val"
function based on the SMT status (has_smt) of the CPU.

Regards,
Dorjoy
Marcin Juszkiewicz May 2, 2024, 1:11 p.m. UTC | #15
W dniu 2.05.2024 o 15:04, Dorjoy Chowdhury pisze:
>> Should "return" also have "(1 << 24) |" to have MT=1 set?
>>
>> Otherwise MPIDR_EL1 = 0x000100 can mean core0 in cluster1 or core1 in
>> cluster0.
>>
>> Value 0x1000100 shows MT=1 so thread0 in core1 in cluster0.

> I don't know all the details but from what I understand the
> "arm_build_mp_afiinity" is used to set the "mp_affinity" member
> variable which I assume is about affinity, not the whole MPIDR
> register value. That is what I assumed because the Uniprocessor
> indication bit(30) is being set only in the "mpidr_read_val" function.
> In the patch, the MT bit is also being set in the "mpidr_read_val"
> function based on the SMT status (has_smt) of the CPU.

mpidr_read_val() is used only to set VMPIDR and VMPIDR_EL2 registers.

So setting MT bit for MPIDR_EL1 needs to be added somewhere.
Peter Maydell May 2, 2024, 1:13 p.m. UTC | #16
On Thu, 2 May 2024 at 14:11, Marcin Juszkiewicz
<marcin.juszkiewicz@linaro.org> wrote:
>
> W dniu 2.05.2024 o 15:04, Dorjoy Chowdhury pisze:
> >> Should "return" also have "(1 << 24) |" to have MT=1 set?
> >>
> >> Otherwise MPIDR_EL1 = 0x000100 can mean core0 in cluster1 or core1 in
> >> cluster0.
> >>
> >> Value 0x1000100 shows MT=1 so thread0 in core1 in cluster0.
>
> > I don't know all the details but from what I understand the
> > "arm_build_mp_afiinity" is used to set the "mp_affinity" member
> > variable which I assume is about affinity, not the whole MPIDR
> > register value. That is what I assumed because the Uniprocessor
> > indication bit(30) is being set only in the "mpidr_read_val" function.
> > In the patch, the MT bit is also being set in the "mpidr_read_val"
> > function based on the SMT status (has_smt) of the CPU.
>
> mpidr_read_val() is used only to set VMPIDR and VMPIDR_EL2 registers.
>
> So setting MT bit for MPIDR_EL1 needs to be added somewhere.

The readfn for MPIDR_EL1 is mpidr_read(), which calls
mpidr_read_val().

thanks
-- PMM
Marcin Juszkiewicz May 2, 2024, 1:50 p.m. UTC | #17
W dniu 2.05.2024 o 15:13, Peter Maydell pisze:
> On Thu, 2 May 2024 at 14:11, Marcin Juszkiewicz
> <marcin.juszkiewicz@linaro.org> wrote:
>>
>> W dniu 2.05.2024 o 15:04, Dorjoy Chowdhury pisze:
>>>> Should "return" also have "(1 << 24) |" to have MT=1 set?
>>>>
>>>> Otherwise MPIDR_EL1 = 0x000100 can mean core0 in cluster1 or core1 in
>>>> cluster0.
>>>>
>>>> Value 0x1000100 shows MT=1 so thread0 in core1 in cluster0.
>>
>>> I don't know all the details but from what I understand the
>>> "arm_build_mp_afiinity" is used to set the "mp_affinity" member
>>> variable which I assume is about affinity, not the whole MPIDR
>>> register value. That is what I assumed because the Uniprocessor
>>> indication bit(30) is being set only in the "mpidr_read_val" function.
>>> In the patch, the MT bit is also being set in the "mpidr_read_val"
>>> function based on the SMT status (has_smt) of the CPU.
>>
>> mpidr_read_val() is used only to set VMPIDR and VMPIDR_EL2 registers.
>>
>> So setting MT bit for MPIDR_EL1 needs to be added somewhere.
> 
> The readfn for MPIDR_EL1 is mpidr_read(), which calls
> mpidr_read_val().

My mistake.

Both hw/arm/sbsa-ref.c and hw/arm/virt.c build cpu information in 
DeviceTree using "arm_build_mp_afinnity()" function. So if firmware 
parses it then it gets wrong values.

Firmware should probably read MPIDR_EL1 directly instead but what with 
those who read DT and rely on this value already?
Peter Maydell May 2, 2024, 1:57 p.m. UTC | #18
On Thu, 2 May 2024 at 14:50, Marcin Juszkiewicz
<marcin.juszkiewicz@linaro.org> wrote:
> Both hw/arm/sbsa-ref.c and hw/arm/virt.c build cpu information in
> DeviceTree using "arm_build_mp_afinnity()" function. So if firmware
> parses it then it gets wrong values.

What wrong values? The values in the dtb should match the
Aff* fields, they are not the complete MPIDR_EL1 values
including U and MT bits and so on:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/arm/cpus.yaml#n42
AIUI the ACPI spec says the same.

> Firmware should probably read MPIDR_EL1 directly instead but what with
> those who read DT and rely on this value already?

Firmware should probably not read MPIDR_EL1 directly for
topology information, because it's too vague and
unreliable. Either it's real-hardware firmware, in which
case it presumably knows the topology already, or else
it's running on QEMU, in which case this is one of the
things we can feed it via the DTB (either on virt or
or sbsa-ref).

thanks
-- PMM
diff mbox series

Patch

diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index cc68b5d8f1..9d5dcf1a3f 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -487,7 +487,7 @@  static void npcm7xx_realize(DeviceState *dev, Error **errp)
     /* CPUs */
     for (i = 0; i < nc->num_cpus; i++) {
         object_property_set_int(OBJECT(&s->cpu[i]), "mp-affinity",
-                                arm_build_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS),
+                                arm_build_mp_affinity(ARM_CPU(&s->cpu[i]), i, NPCM7XX_MAX_NUM_CPUS),
                                 &error_abort);
         object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar",
                                 NPCM7XX_GIC_CPU_IF_ADDR, &error_abort);
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index f5709d6c14..dd42788f23 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -147,10 +147,10 @@  static const int sbsa_ref_irqmap[] = {
     [SBSA_GWDT_WS0] = 16,
 };
 
-static uint64_t sbsa_ref_cpu_mp_affinity(SBSAMachineState *sms, int idx)
+static uint64_t sbsa_ref_cpu_mp_affinity(ARMCPU *cpu, int idx)
 {
     uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER;
-    return arm_build_mp_affinity(idx, clustersz);
+    return arm_build_mp_affinity(cpu, idx, clustersz);
 }
 
 static void sbsa_fdt_add_gic_node(SBSAMachineState *sms)
@@ -254,7 +254,7 @@  static void create_fdt(SBSAMachineState *sms)
         char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
         CPUState *cs = CPU(armcpu);
-        uint64_t mpidr = sbsa_ref_cpu_mp_affinity(sms, cpu);
+        uint64_t mpidr = sbsa_ref_cpu_mp_affinity(armcpu, cpu);
 
         qemu_fdt_add_subnode(sms->fdt, nodename);
         qemu_fdt_setprop_u64(sms->fdt, nodename, "reg", mpidr);
@@ -816,8 +816,9 @@  static void sbsa_ref_init(MachineState *machine)
 static const CPUArchIdList *sbsa_ref_possible_cpu_arch_ids(MachineState *ms)
 {
     unsigned int max_cpus = ms->smp.max_cpus;
-    SBSAMachineState *sms = SBSA_MACHINE(ms);
     int n;
+    Object *cpuobj;
+    ARMCPU *armcpu;
 
     if (ms->possible_cpus) {
         assert(ms->possible_cpus->len == max_cpus);
@@ -827,13 +828,23 @@  static const CPUArchIdList *sbsa_ref_possible_cpu_arch_ids(MachineState *ms)
     ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
                                   sizeof(CPUArchId) * max_cpus);
     ms->possible_cpus->len = max_cpus;
+
+    /*
+     * Instantiate a temporary CPU object to build mp_affinity
+     * of the possible CPUs.
+     */
+    cpuobj = object_new(ms->cpu_type);
+    armcpu = ARM_CPU(cpuobj);
+
     for (n = 0; n < ms->possible_cpus->len; n++) {
         ms->possible_cpus->cpus[n].type = ms->cpu_type;
         ms->possible_cpus->cpus[n].arch_id =
-            sbsa_ref_cpu_mp_affinity(sms, n);
+            sbsa_ref_cpu_mp_affinity(armcpu, n);
         ms->possible_cpus->cpus[n].props.has_thread_id = true;
         ms->possible_cpus->cpus[n].props.thread_id = n;
     }
+
+    object_unref(cpuobj);
     return ms->possible_cpus;
 }
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a9a913aead..fe6d13c08f 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1703,7 +1703,7 @@  void virt_machine_done(Notifier *notifier, void *data)
     virt_build_smbios(vms);
 }
 
-static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
+static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, ARMCPU *cpu, int idx)
 {
     uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER;
     VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
@@ -1723,7 +1723,7 @@  static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
             clustersz = GICV3_TARGETLIST_BITS;
         }
     }
-    return arm_build_mp_affinity(idx, clustersz);
+    return arm_build_mp_affinity(cpu, idx, clustersz);
 }
 
 static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
@@ -2683,6 +2683,8 @@  static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
     unsigned int max_cpus = ms->smp.max_cpus;
     VirtMachineState *vms = VIRT_MACHINE(ms);
     MachineClass *mc = MACHINE_GET_CLASS(vms);
+    Object *cpuobj;
+    ARMCPU *armcpu;
 
     if (ms->possible_cpus) {
         assert(ms->possible_cpus->len == max_cpus);
@@ -2692,10 +2694,18 @@  static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
     ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
                                   sizeof(CPUArchId) * max_cpus);
     ms->possible_cpus->len = max_cpus;
+
+    /*
+     * Instantiate a temporary CPU object to build mp_affinity
+     * of the possible CPUs.
+     */
+    cpuobj = object_new(ms->cpu_type);
+    armcpu = ARM_CPU(cpuobj);
+
     for (n = 0; n < ms->possible_cpus->len; n++) {
         ms->possible_cpus->cpus[n].type = ms->cpu_type;
         ms->possible_cpus->cpus[n].arch_id =
-            virt_cpu_mp_affinity(vms, n);
+            virt_cpu_mp_affinity(vms, armcpu, n);
 
         assert(!mc->smp_props.dies_supported);
         ms->possible_cpus->cpus[n].props.has_socket_id = true;
@@ -2711,6 +2721,8 @@  static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
         ms->possible_cpus->cpus[n].props.thread_id =
             n % ms->smp.threads;
     }
+
+    object_unref(cpuobj);
     return ms->possible_cpus;
 }
 
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ab8d007a86..34ee98f5f9 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1314,8 +1314,18 @@  static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
     }
 }
 
-uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
+uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz)
 {
+    if (cpu->has_smt) {
+        /*
+         * Right now, the ARM CPUs with SMT supported by QEMU only have
+         * one thread per core. So Aff0 is always 0.
+         */
+        uint32_t Aff2 = idx / clustersz;
+        uint32_t Aff1 = idx % clustersz;
+        uint32_t Aff0 = 0;
+        return (Aff2 << ARM_AFF2_SHIFT) | (Aff1 << ARM_AFF1_SHIFT) | Aff0;
+    }
     uint32_t Aff1 = idx / clustersz;
     uint32_t Aff0 = idx % clustersz;
     return (Aff1 << ARM_AFF1_SHIFT) | Aff0;
@@ -2136,7 +2146,7 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
      * so these bits always RAZ.
      */
     if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) {
-        cpu->mp_affinity = arm_build_mp_affinity(cs->cpu_index,
+        cpu->mp_affinity = arm_build_mp_affinity(cpu, cs->cpu_index,
                                                  ARM_DEFAULT_CPUS_PER_CLUSTER);
     }
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index bc0c84873f..57343c7e24 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -948,6 +948,9 @@  struct ArchCPU {
     /* Uniprocessor system with MP extensions */
     bool mp_is_up;
 
+    /* Arm cores with SMT support */
+    bool has_smt;
+
     /* True if we tried kvm_arm_host_cpu_features() during CPU instance_init
      * and the probe failed (so we need to report the error in realize)
      */
@@ -1140,7 +1143,7 @@  void arm_cpu_post_init(Object *obj);
     (ARM_AFF0_MASK | ARM_AFF1_MASK | ARM_AFF2_MASK | ARM_AFF3_MASK)
 #define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK)
 
-uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz);
+uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz);
 
 #ifndef CONFIG_USER_ONLY
 extern const VMStateDescription vmstate_arm_cpu;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index a620481d7c..3e09bc950b 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4676,6 +4676,10 @@  static uint64_t mpidr_read_val(CPUARMState *env)
             mpidr |= (1u << 30);
         }
     }
+
+    if (cpu->has_smt) {
+        mpidr |= (1U << 24);
+    }
     return mpidr;
 }
 
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 9f7a9f3d2c..8807809842 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -289,6 +289,8 @@  static void aarch64_a55_initfn(Object *obj)
 
     /* From D5.4 AArch64 PMU register summary */
     cpu->isar.reset_pmcr_el0 = 0x410b3000;
+
+    cpu->has_smt = true;
 }
 
 static void aarch64_a72_initfn(Object *obj)
@@ -413,6 +415,8 @@  static void aarch64_a76_initfn(Object *obj)
 
     /* From D5.1 AArch64 PMU register summary */
     cpu->isar.reset_pmcr_el0 = 0x410b3000;
+
+    cpu->has_smt = true;
 }
 
 static void aarch64_a64fx_initfn(Object *obj)
@@ -652,6 +656,8 @@  static void aarch64_neoverse_n1_initfn(Object *obj)
     /* From D5.1 AArch64 PMU register summary */
     cpu->isar.reset_pmcr_el0 = 0x410c3000;
 
+    cpu->has_smt = true;
+
     define_neoverse_n1_cp_reginfo(cpu);
 }
 
@@ -740,6 +746,8 @@  static void aarch64_neoverse_v1_initfn(Object *obj)
     /* From 5.5.1 AArch64 PMU register summary */
     cpu->isar.reset_pmcr_el0 = 0x41213000;
 
+    cpu->has_smt = true;
+
     define_neoverse_v1_cp_reginfo(cpu);
 
     aarch64_add_pauth_properties(obj);
@@ -958,6 +966,8 @@  static void aarch64_a710_initfn(Object *obj)
     /* FIXME: Not documented -- copied from neoverse-v1 */
     cpu->reset_sctlr = 0x30c50838;
 
+    cpu->has_smt = true;
+
     define_arm_cp_regs(cpu, cortex_a710_cp_reginfo);
 
     aarch64_add_pauth_properties(obj);
@@ -1055,6 +1065,8 @@  static void aarch64_neoverse_n2_initfn(Object *obj)
     /* FIXME: Not documented -- copied from neoverse-v1 */
     cpu->reset_sctlr = 0x30c50838;
 
+    cpu->has_smt = true;
+
     /*
      * The Neoverse N2 has all of the Cortex-A710 IMPDEF registers,
      * and a few more RNG related ones.