diff mbox series

[v2] riscv: cancel the limitation that NR_CPUS is less than or equal to 32

Message ID 20211221233253.123268-1-wxjstz@126.com
State Changes Requested
Delegated to: Andes
Headers show
Series [v2] riscv: cancel the limitation that NR_CPUS is less than or equal to 32 | expand

Commit Message

Xiang W Dec. 21, 2021, 11:32 p.m. UTC
Various specifications of riscv allow the number of hart to be
greater than 32. The limit of 32 is determined by
gd->arch.available_harts. We can eliminate this limitation through
bitmaps. Currently, the number of hart is limited to 4095, and 4095
is the limit of the RISC-V Advanced Core Local Interruptor
Specification.

Test on sifive unmatched.

Signed-off-by: Xiang W <wxjstz@126.com>
---
Changes since v1:

* When NR_CPUS is very large, the value of GD_AVAILABLE_HARTS will
  overflow the immediate range of ld/lw. This patch fixes this
  problem

 arch/riscv/Kconfig                   |  4 ++--
 arch/riscv/cpu/start.S               | 21 ++++++++++++++++-----
 arch/riscv/include/asm/global_data.h |  4 +++-
 arch/riscv/lib/smp.c                 |  2 +-
 4 files changed, 22 insertions(+), 9 deletions(-)

Comments

Leo Liang Dec. 29, 2021, 9:23 a.m. UTC | #1
Hi Xiang,
On Wed, Dec 22, 2021 at 07:32:53AM +0800, Xiang W wrote:
> Various specifications of riscv allow the number of hart to be
> greater than 32. The limit of 32 is determined by
> gd->arch.available_harts. We can eliminate this limitation through
> bitmaps. Currently, the number of hart is limited to 4095, and 4095
> is the limit of the RISC-V Advanced Core Local Interruptor
> Specification.
> 
> Test on sifive unmatched.
> 
> Signed-off-by: Xiang W <wxjstz@126.com>
> ---
> Changes since v1:
> 
> * When NR_CPUS is very large, the value of GD_AVAILABLE_HARTS will
>   overflow the immediate range of ld/lw. This patch fixes this
>   problem
> 
>  arch/riscv/Kconfig                   |  4 ++--
>  arch/riscv/cpu/start.S               | 21 ++++++++++++++++-----
>  arch/riscv/include/asm/global_data.h |  4 +++-
>  arch/riscv/lib/smp.c                 |  2 +-
>  4 files changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
> index 76850ec9be..92f3b78f29 100644
> --- a/arch/riscv/cpu/start.S
> +++ b/arch/riscv/cpu/start.S
> @@ -166,11 +166,22 @@ wait_for_gd_init:
>  	mv	gp, s0
>  
>  	/* register available harts in the available_harts mask */
> -	li	t1, 1
> -	sll	t1, t1, tp
> -	LREG	t2, GD_AVAILABLE_HARTS(gp)
> -	or	t2, t2, t1
> -	SREG	t2, GD_AVAILABLE_HARTS(gp)
> +	li	t1, GD_AVAILABLE_HARTS
> +	add	t1, t1, gp
> +	LREG	t1, 0(t1)
> +#if defined(CONFIG_ARCH_RV64I)
> +	srli	t2, tp, 6
> +	slli	t2, t2, 3
> +#elif defined(CONFIG_ARCH_RV32I)
> +	srli	t2, tp, 5
> +	slli	t2, t2, 2
> +#endif
> +	add	t1, t1, t2
> +	LREG	t2, 0(t1)
> +	li	t3, 1
> +	sll	t3, t3, tp
This seems incorrect.
Shouldn't we have "$tp % sizeof(ulong)" instead of "$tp / sizeof(ulong)" ?
> +	or	t2, t2, t3
> +	SREG	t2, 0(t1)
>  
>  	amoswap.w.rl zero, zero, 0(t0)
Best regards,
Leo
Xiang W Dec. 29, 2021, 5:55 p.m. UTC | #2
在 2021-12-29星期三的 17:23 +0800,Leo Liang写道:
> Hi Xiang,
> On Wed, Dec 22, 2021 at 07:32:53AM +0800, Xiang W wrote:
> > Various specifications of riscv allow the number of hart to be
> > greater than 32. The limit of 32 is determined by
> > gd->arch.available_harts. We can eliminate this limitation through
> > bitmaps. Currently, the number of hart is limited to 4095, and 4095
> > is the limit of the RISC-V Advanced Core Local Interruptor
> > Specification.
> > 
> > Test on sifive unmatched.
> > 
> > Signed-off-by: Xiang W <wxjstz@126.com>
> > ---
> > Changes since v1:
> > 
> > * When NR_CPUS is very large, the value of GD_AVAILABLE_HARTS will
> >   overflow the immediate range of ld/lw. This patch fixes this
> >   problem
> > 
> >  arch/riscv/Kconfig                   |  4 ++--
> >  arch/riscv/cpu/start.S               | 21 ++++++++++++++++-----
> >  arch/riscv/include/asm/global_data.h |  4 +++-
> >  arch/riscv/lib/smp.c                 |  2 +-
> >  4 files changed, 22 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
> > index 76850ec9be..92f3b78f29 100644
> > --- a/arch/riscv/cpu/start.S
> > +++ b/arch/riscv/cpu/start.S
> > @@ -166,11 +166,22 @@ wait_for_gd_init:
> >         mv      gp, s0
> >  
> >         /* register available harts in the available_harts mask */
> > -       li      t1, 1
> > -       sll     t1, t1, tp
> > -       LREG    t2, GD_AVAILABLE_HARTS(gp)
> > -       or      t2, t2, t1
> > -       SREG    t2, GD_AVAILABLE_HARTS(gp)
> > +       li      t1, GD_AVAILABLE_HARTS
> > +       add     t1, t1, gp
> > +       LREG    t1, 0(t1)
> > +#if defined(CONFIG_ARCH_RV64I)
> > +       srli    t2, tp, 6
> > +       slli    t2, t2, 3
> > +#elif defined(CONFIG_ARCH_RV32I)
> > +       srli    t2, tp, 5
> > +       slli    t2, t2, 2
> > +#endif
> > +       add     t1, t1, t2
> > +       LREG    t2, 0(t1)
> > +       li      t3, 1
> > +       sll     t3, t3, tp
> This seems incorrect.
> Shouldn't we have "$tp % sizeof(ulong)" instead of "$tp /
> sizeof(ulong)" ?

Do you meening: "$tp % sizeof(ulong)" instead of "$tp" ?

There is such a description in the riscv specification:

SLL, SRL, and SRA perform logical left, logical right, and arithmetic
right shifts on the value in register rs1 by the shift amount held in
the lower 5 bits of register rs2.

SLL, SRL, and SRA perform logical left, logical right, and arithmetic
right shifts on the value in register rs1 by the shift amount held in
register rs2. In RV64I, only the low 6 bits of rs2 are considered for
the shift amount.

So we don’t need to perform the remainder operation.

regards,
Xiang W
> > +       or      t2, t2, t3
> > +       SREG    t2, 0(t1)
> >  
> >         amoswap.w.rl zero, zero, 0(t0)
> Best regards,
> Leo
Leo Liang Jan. 3, 2022, 11:11 a.m. UTC | #3
On Thu, Dec 30, 2021 at 01:55:15AM +0800, Xiang W wrote:
> 在 2021-12-29星期三的 17:23 +0800,Leo Liang写道:
> > Hi Xiang,
> > On Wed, Dec 22, 2021 at 07:32:53AM +0800, Xiang W wrote:
> > > Various specifications of riscv allow the number of hart to be
> > > greater than 32. The limit of 32 is determined by
> > > gd->arch.available_harts. We can eliminate this limitation through
> > > bitmaps. Currently, the number of hart is limited to 4095, and 4095
> > > is the limit of the RISC-V Advanced Core Local Interruptor
> > > Specification.
> > > 
> > > Test on sifive unmatched.
> > > 
> > > Signed-off-by: Xiang W <wxjstz@126.com>
> > > ---
> > > Changes since v1:
> > > 
> > > * When NR_CPUS is very large, the value of GD_AVAILABLE_HARTS will
> > >   overflow the immediate range of ld/lw. This patch fixes this
> > >   problem
> > > 
> > >  arch/riscv/Kconfig                   |  4 ++--
> > >  arch/riscv/cpu/start.S               | 21 ++++++++++++++++-----
> > >  arch/riscv/include/asm/global_data.h |  4 +++-
> > >  arch/riscv/lib/smp.c                 |  2 +-
> > >  4 files changed, 22 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
> > > index 76850ec9be..92f3b78f29 100644
> > > --- a/arch/riscv/cpu/start.S
> > > +++ b/arch/riscv/cpu/start.S
> > > @@ -166,11 +166,22 @@ wait_for_gd_init:
> > >         mv      gp, s0
> > >  
> > >         /* register available harts in the available_harts mask */
> > > -       li      t1, 1
> > > -       sll     t1, t1, tp
> > > -       LREG    t2, GD_AVAILABLE_HARTS(gp)
> > > -       or      t2, t2, t1
> > > -       SREG    t2, GD_AVAILABLE_HARTS(gp)
> > > +       li      t1, GD_AVAILABLE_HARTS
> > > +       add     t1, t1, gp
> > > +       LREG    t1, 0(t1)
> > > +#if defined(CONFIG_ARCH_RV64I)
> > > +       srli    t2, tp, 6
> > > +       slli    t2, t2, 3
> > > +#elif defined(CONFIG_ARCH_RV32I)
> > > +       srli    t2, tp, 5
> > > +       slli    t2, t2, 2
> > > +#endif
> > > +       add     t1, t1, t2
> > > +       LREG    t2, 0(t1)
> > > +       li      t3, 1
> > > +       sll     t3, t3, tp
> > This seems incorrect.
> > Shouldn't we have "$tp % sizeof(ulong)" instead of "$tp /
> > sizeof(ulong)" ?
> 
> Do you meening: "$tp % sizeof(ulong)" instead of "$tp" ?
> 
> There is such a description in the riscv specification:
> 
> SLL, SRL, and SRA perform logical left, logical right, and arithmetic
> right shifts on the value in register rs1 by the shift amount held in
> the lower 5 bits of register rs2.
> 
> SLL, SRL, and SRA perform logical left, logical right, and arithmetic
> right shifts on the value in register rs1 by the shift amount held in
> register rs2. In RV64I, only the low 6 bits of rs2 are considered for
> the shift amount.
> 
> So we don’t need to perform the remainder operation.

Got it! Thanks for the explanation.

LGTM,
Reviewed-by: Leo Yu-Chi Liang <ycliang@andestech.com>

Best regards,
Leo
> 
> regards,
> Xiang W
> > > +       or      t2, t2, t3
> > > +       SREG    t2, 0(t1)
> > >  
> > >         amoswap.w.rl zero, zero, 0(t0)
> > Best regards,
> > Leo
> 
>
David Abdurachmanov Feb. 3, 2023, 2:24 p.m. UTC | #4
On Mon, Jan 3, 2022 at 1:13 PM Leo Liang <ycliang@andestech.com> wrote:
>
> On Thu, Dec 30, 2021 at 01:55:15AM +0800, Xiang W wrote:
> > 在 2021-12-29星期三的 17:23 +0800,Leo Liang写道:
> > > Hi Xiang,
> > > On Wed, Dec 22, 2021 at 07:32:53AM +0800, Xiang W wrote:
> > > > Various specifications of riscv allow the number of hart to be
> > > > greater than 32. The limit of 32 is determined by
> > > > gd->arch.available_harts. We can eliminate this limitation through
> > > > bitmaps. Currently, the number of hart is limited to 4095, and 4095
> > > > is the limit of the RISC-V Advanced Core Local Interruptor
> > > > Specification.
> > > >
> > > > Test on sifive unmatched.
> > > >
> > > > Signed-off-by: Xiang W <wxjstz@126.com>
> > > > ---
> > > > Changes since v1:
> > > >
> > > > * When NR_CPUS is very large, the value of GD_AVAILABLE_HARTS will
> > > >   overflow the immediate range of ld/lw. This patch fixes this
> > > >   problem
> > > >
> > > >  arch/riscv/Kconfig                   |  4 ++--
> > > >  arch/riscv/cpu/start.S               | 21 ++++++++++++++++-----
> > > >  arch/riscv/include/asm/global_data.h |  4 +++-
> > > >  arch/riscv/lib/smp.c                 |  2 +-
> > > >  4 files changed, 22 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
> > > > index 76850ec9be..92f3b78f29 100644
> > > > --- a/arch/riscv/cpu/start.S
> > > > +++ b/arch/riscv/cpu/start.S
> > > > @@ -166,11 +166,22 @@ wait_for_gd_init:
> > > >         mv      gp, s0
> > > >
> > > >         /* register available harts in the available_harts mask */
> > > > -       li      t1, 1
> > > > -       sll     t1, t1, tp
> > > > -       LREG    t2, GD_AVAILABLE_HARTS(gp)
> > > > -       or      t2, t2, t1
> > > > -       SREG    t2, GD_AVAILABLE_HARTS(gp)
> > > > +       li      t1, GD_AVAILABLE_HARTS
> > > > +       add     t1, t1, gp
> > > > +       LREG    t1, 0(t1)
> > > > +#if defined(CONFIG_ARCH_RV64I)
> > > > +       srli    t2, tp, 6
> > > > +       slli    t2, t2, 3
> > > > +#elif defined(CONFIG_ARCH_RV32I)
> > > > +       srli    t2, tp, 5
> > > > +       slli    t2, t2, 2
> > > > +#endif
> > > > +       add     t1, t1, t2
> > > > +       LREG    t2, 0(t1)
> > > > +       li      t3, 1
> > > > +       sll     t3, t3, tp
> > > This seems incorrect.
> > > Shouldn't we have "$tp % sizeof(ulong)" instead of "$tp /
> > > sizeof(ulong)" ?
> >
> > Do you meening: "$tp % sizeof(ulong)" instead of "$tp" ?
> >
> > There is such a description in the riscv specification:
> >
> > SLL, SRL, and SRA perform logical left, logical right, and arithmetic
> > right shifts on the value in register rs1 by the shift amount held in
> > the lower 5 bits of register rs2.
> >
> > SLL, SRL, and SRA perform logical left, logical right, and arithmetic
> > right shifts on the value in register rs1 by the shift amount held in
> > register rs2. In RV64I, only the low 6 bits of rs2 are considered for
> > the shift amount.
> >
> > So we don’t need to perform the remainder operation.
>
> Got it! Thanks for the explanation.
>
> LGTM,
> Reviewed-by: Leo Yu-Chi Liang <ycliang@andestech.com>

I noticed that this has never landed in U-Boot. Was this forgotten or
dropped for some reason (couldn't find anything)?

The current limit on the Linux kernel side is 512. The default on
64-bit (riscv64) is 64.

david


>
> Best regards,
> Leo
> >
> > regards,
> > Xiang W
> > > > +       or      t2, t2, t3
> > > > +       SREG    t2, 0(t1)
> > > >
> > > >         amoswap.w.rl zero, zero, 0(t0)
> > > Best regards,
> > > Leo
> >
> >
Leo Liang Feb. 6, 2023, 8:07 a.m. UTC | #5
Hi David, 
On Fri, Feb 03, 2023 at 03:24:37PM +0100, David Abdurachmanov wrote:
> On Mon, Jan 3, 2022 at 1:13 PM Leo Liang <ycliang@andestech.com> wrote:
> >
> > On Thu, Dec 30, 2021 at 01:55:15AM +0800, Xiang W wrote:
> > > 在 2021-12-29星期三的 17:23 +0800,Leo Liang写道:
> > > > Hi Xiang,
> > > > On Wed, Dec 22, 2021 at 07:32:53AM +0800, Xiang W wrote:
> > > > > Various specifications of riscv allow the number of hart to be
> > > > > greater than 32. The limit of 32 is determined by
> > > > > gd->arch.available_harts. We can eliminate this limitation through
> > > > > bitmaps. Currently, the number of hart is limited to 4095, and 4095
> > > > > is the limit of the RISC-V Advanced Core Local Interruptor
> > > > > Specification.
> > > > >
> > > > > Test on sifive unmatched.
> > > > >
> > > > > Signed-off-by: Xiang W <wxjstz@126.com>
> 
> I noticed that this has never landed in U-Boot. Was this forgotten or
> dropped for some reason (couldn't find anything)?
> 

Sorry, This patch is forgotten.
I will make sure this gets applied as soon as possible
if there is no other error or concerns.

Thanks for the reminder!

Best regards,
Leo

> The current limit on the Linux kernel side is 512. The default on
> 64-bit (riscv64) is 64.
> 
> david
Leo Liang Feb. 10, 2023, 7:25 a.m. UTC | #6
Hi Xiang,

On Fri, Feb 03, 2023 at 03:24:37PM +0100, David Abdurachmanov wrote:
> On Mon, Jan 3, 2022 at 1:13 PM Leo Liang <ycliang@andestech.com> wrote:
> >
> > On Thu, Dec 30, 2021 at 01:55:15AM +0800, Xiang W wrote:
> > > 在 2021-12-29星期三的 17:23 +0800,Leo Liang写道:
> > > > Hi Xiang,
> > > > On Wed, Dec 22, 2021 at 07:32:53AM +0800, Xiang W wrote:
> > > > > Various specifications of riscv allow the number of hart to be
> > > > > greater than 32. The limit of 32 is determined by
> > > > > gd->arch.available_harts. We can eliminate this limitation through
> > > > > bitmaps. Currently, the number of hart is limited to 4095, and 4095
> > > > > is the limit of the RISC-V Advanced Core Local Interruptor
> > > > > Specification.
> > > > >
> > > > > Test on sifive unmatched.
> > > > >
> > > > > Signed-off-by: Xiang W <wxjstz@126.com>
> > > > > ---
> > > > > Changes since v1:
> > > > >
> > > > > * When NR_CPUS is very large, the value of GD_AVAILABLE_HARTS will
> > > > >   overflow the immediate range of ld/lw. This patch fixes this
> > > > >   problem
> > > > >
> > > > >  arch/riscv/Kconfig                   |  4 ++--
> > > > >  arch/riscv/cpu/start.S               | 21 ++++++++++++++++-----
> > > > >  arch/riscv/include/asm/global_data.h |  4 +++-
> > > > >  arch/riscv/lib/smp.c                 |  2 +-
> > > > >  4 files changed, 22 insertions(+), 9 deletions(-)
> > > > >
> 
> I noticed that this has never landed in U-Boot. Was this forgotten or
> dropped for some reason (couldn't find anything)?
> 
> The current limit on the Linux kernel side is 512. The default on
> 64-bit (riscv64) is 64.
> 
> david

The patch seems to cause some CI error (timeout on QEMU).
(https://source.denx.de/u-boot/custodians/u-boot-riscv/-/pipelines/15076)
Could you take a look at it if you have time?

Best regards,
Leo
Xiang W Feb. 11, 2023, 2:11 p.m. UTC | #7
在 2023-02-10星期五的 07:25 +0000,Leo Liang写道:
> Hi Xiang,
> 
> On Fri, Feb 03, 2023 at 03:24:37PM +0100, David Abdurachmanov wrote:
> > On Mon, Jan 3, 2022 at 1:13 PM Leo Liang <ycliang@andestech.com> wrote:
> > > 
> > > On Thu, Dec 30, 2021 at 01:55:15AM +0800, Xiang W wrote:
> > > > 在 2021-12-29星期三的 17:23 +0800,Leo Liang写道:
> > > > > Hi Xiang,
> > > > > On Wed, Dec 22, 2021 at 07:32:53AM +0800, Xiang W wrote:
> > > > > > Various specifications of riscv allow the number of hart to be
> > > > > > greater than 32. The limit of 32 is determined by
> > > > > > gd->arch.available_harts. We can eliminate this limitation through
> > > > > > bitmaps. Currently, the number of hart is limited to 4095, and 4095
> > > > > > is the limit of the RISC-V Advanced Core Local Interruptor
> > > > > > Specification.
> > > > > > 
> > > > > > Test on sifive unmatched.
> > > > > > 
> > > > > > Signed-off-by: Xiang W <wxjstz@126.com>
> > > > > > ---
> > > > > > Changes since v1:
> > > > > > 
> > > > > > * When NR_CPUS is very large, the value of GD_AVAILABLE_HARTS will
> > > > > >   overflow the immediate range of ld/lw. This patch fixes this
> > > > > >   problem
> > > > > > 
> > > > > >  arch/riscv/Kconfig                   |  4 ++--
> > > > > >  arch/riscv/cpu/start.S               | 21 ++++++++++++++++-----
> > > > > >  arch/riscv/include/asm/global_data.h |  4 +++-
> > > > > >  arch/riscv/lib/smp.c                 |  2 +-
> > > > > >  4 files changed, 22 insertions(+), 9 deletions(-)
> > > > > > 
> > 
> > I noticed that this has never landed in U-Boot. Was this forgotten or
> > dropped for some reason (couldn't find anything)?
> > 
> > The current limit on the Linux kernel side is 512. The default on
> > 64-bit (riscv64) is 64.
> > 
> > david
> 
> The patch seems to cause some CI error (timeout on QEMU).
> (https://source.denx.de/u-boot/custodians/u-boot-riscv/-/pipelines/15076)
> Could you take a look at it if you have time?
> 
> Best regards,
> Leo

sorry! I missing a bug. There is an error in calculating the starting address
of available_harts. The patch for start.S needs to be updated.

diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
index 76850ec9be..92f3b78f29 100644
--- a/arch/riscv/cpu/start.S
+++ b/arch/riscv/cpu/start.S
@@ -166,11 +166,22 @@ wait_for_gd_init:
 	mv	gp, s0
 
 	/* register available harts in the available_harts mask */
-	li	t1, 1
-	sll	t1, t1, tp
-	LREG	t2, GD_AVAILABLE_HARTS(gp)
-	or	t2, t2, t1
-	SREG	t2, GD_AVAILABLE_HARTS(gp)
+	li	t1, GD_AVAILABLE_HARTS
+	add	t1, t1, gp
+#if defined(CONFIG_ARCH_RV64I)
+	srli	t2, tp, 6
+	slli	t2, t2, 3
+#elif defined(CONFIG_ARCH_RV32I)
+	srli	t2, tp, 5
+	slli	t2, t2, 2
+#endif
+	add	t1, t1, t2
+	LREG	t2, 0(t1)
+	li	t3, 1
+	sll	t3, t3, tp
+	or	t2, t2, t3
+	SREG	t2, 0(t1)
 
 	amoswap.w.rl zero, zero, 0(t0)

The mailing list cannot receive my mail, please help to update
Leo Liang Feb. 13, 2023, 8:46 a.m. UTC | #8
Hi Xiang,

On Sat, Feb 11, 2023 at 10:11:31PM +0800, Xiang W wrote:
> 在 2023-02-10星期五的 07:25 +0000,Leo Liang写道:
> > Hi Xiang,
> > 
> > On Fri, Feb 03, 2023 at 03:24:37PM +0100, David Abdurachmanov wrote:
> > > On Mon, Jan 3, 2022 at 1:13 PM Leo Liang <ycliang@andestech.com> wrote:
> > > > 
> > > > On Thu, Dec 30, 2021 at 01:55:15AM +0800, Xiang W wrote:
> > > > > 在 2021-12-29星期三的 17:23 +0800,Leo Liang写道:
> > > > > > Hi Xiang,
> > > > > > On Wed, Dec 22, 2021 at 07:32:53AM +0800, Xiang W wrote:
> > > > > > > Various specifications of riscv allow the number of hart to be
> > > > > > > greater than 32. The limit of 32 is determined by
> > > > > > > gd->arch.available_harts. We can eliminate this limitation through
> > > > > > > bitmaps. Currently, the number of hart is limited to 4095, and 4095
> > > > > > > is the limit of the RISC-V Advanced Core Local Interruptor
> > > > > > > Specification.
> > > > > > > 
> > > > > > > Test on sifive unmatched.
> > > > > > > 
> > > > > > > Signed-off-by: Xiang W <wxjstz@126.com>
> > > > > > > ---
> > > > > > > Changes since v1:
> > > > > > > 
> > > > > > > * When NR_CPUS is very large, the value of GD_AVAILABLE_HARTS will
> > > > > > >   overflow the immediate range of ld/lw. This patch fixes this
> > > > > > >   problem
> > > > > > > 
> > > > > > >  arch/riscv/Kconfig                   |  4 ++--
> > > > > > >  arch/riscv/cpu/start.S               | 21 ++++++++++++++++-----
> > > > > > >  arch/riscv/include/asm/global_data.h |  4 +++-
> > > > > > >  arch/riscv/lib/smp.c                 |  2 +-
> > > > > > >  4 files changed, 22 insertions(+), 9 deletions(-)
> > > > > > > 
> > > 
> > > I noticed that this has never landed in U-Boot. Was this forgotten or
> > > dropped for some reason (couldn't find anything)?
> > > 
> > > The current limit on the Linux kernel side is 512. The default on
> > > 64-bit (riscv64) is 64.
> > > 
> > > david
> > 
> > The patch seems to cause some CI error (timeout on QEMU).
> > (https://source.denx.de/u-boot/custodians/u-boot-riscv/-/pipelines/15076)
> > Could you take a look at it if you have time?
> > 
> > Best regards,
> > Leo
> 
> sorry! I missing a bug. There is an error in calculating the starting address
> of available_harts. The patch for start.S needs to be updated.
> 
> diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
> index 76850ec9be..92f3b78f29 100644
> --- a/arch/riscv/cpu/start.S
> +++ b/arch/riscv/cpu/start.S
> @@ -166,11 +166,22 @@ wait_for_gd_init:
>  	mv	gp, s0
>  
>  	/* register available harts in the available_harts mask */
> -	li	t1, 1
> -	sll	t1, t1, tp
> -	LREG	t2, GD_AVAILABLE_HARTS(gp)
> -	or	t2, t2, t1
> -	SREG	t2, GD_AVAILABLE_HARTS(gp)
> +	li	t1, GD_AVAILABLE_HARTS
> +	add	t1, t1, gp
> +#if defined(CONFIG_ARCH_RV64I)
> +	srli	t2, tp, 6
> +	slli	t2, t2, 3
> +#elif defined(CONFIG_ARCH_RV32I)
> +	srli	t2, tp, 5
> +	slli	t2, t2, 2
> +#endif
> +	add	t1, t1, t2
> +	LREG	t2, 0(t1)
> +	li	t3, 1
> +	sll	t3, t3, tp
> +	or	t2, t2, t3
> +	SREG	t2, 0(t1)
>  
>  	amoswap.w.rl zero, zero, 0(t0)
> 
> The mailing list cannot receive my mail, please help to update
> 

I have updated the patch.
(https://patchwork.ozlabs.org/project/uboot/patch/20230213084313.10419-1-ycliang@andestech.com/)
Could you take a look to see if there is any issue?

Best regards,
Leo
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index ba29e70acf..7b9c7f5bca 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -220,8 +220,8 @@  config SPL_SMP
 	  all, single processor machines.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-32)"
-	range 2 32
+	int "Maximum number of CPUs (2-4095)"
+	range 2 4095
 	depends on SMP || SPL_SMP
 	default 8
 	help
diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
index 76850ec9be..92f3b78f29 100644
--- a/arch/riscv/cpu/start.S
+++ b/arch/riscv/cpu/start.S
@@ -166,11 +166,22 @@  wait_for_gd_init:
 	mv	gp, s0
 
 	/* register available harts in the available_harts mask */
-	li	t1, 1
-	sll	t1, t1, tp
-	LREG	t2, GD_AVAILABLE_HARTS(gp)
-	or	t2, t2, t1
-	SREG	t2, GD_AVAILABLE_HARTS(gp)
+	li	t1, GD_AVAILABLE_HARTS
+	add	t1, t1, gp
+	LREG	t1, 0(t1)
+#if defined(CONFIG_ARCH_RV64I)
+	srli	t2, tp, 6
+	slli	t2, t2, 3
+#elif defined(CONFIG_ARCH_RV32I)
+	srli	t2, tp, 5
+	slli	t2, t2, 2
+#endif
+	add	t1, t1, t2
+	LREG	t2, 0(t1)
+	li	t3, 1
+	sll	t3, t3, tp
+	or	t2, t2, t3
+	SREG	t2, 0(t1)
 
 	amoswap.w.rl zero, zero, 0(t0)
 
diff --git a/arch/riscv/include/asm/global_data.h b/arch/riscv/include/asm/global_data.h
index 095484a635..6de2ee0b25 100644
--- a/arch/riscv/include/asm/global_data.h
+++ b/arch/riscv/include/asm/global_data.h
@@ -10,9 +10,11 @@ 
 #ifndef	__ASM_GBL_DATA_H
 #define __ASM_GBL_DATA_H
 
+#include <config.h>
 #include <asm/smp.h>
 #include <asm/u-boot.h>
 #include <compiler.h>
+#include <linux/bitops.h>
 
 /* Architecture-specific global data */
 struct arch_global_data {
@@ -28,7 +30,7 @@  struct arch_global_data {
 	struct ipi_data ipi[CONFIG_NR_CPUS];
 #endif
 #ifndef CONFIG_XIP
-	ulong available_harts;
+	ulong available_harts[BITS_TO_LONGS(CONFIG_NR_CPUS)];
 #endif
 };
 
diff --git a/arch/riscv/lib/smp.c b/arch/riscv/lib/smp.c
index ba992100ad..e8e391fd41 100644
--- a/arch/riscv/lib/smp.c
+++ b/arch/riscv/lib/smp.c
@@ -47,7 +47,7 @@  static int send_ipi_many(struct ipi_data *ipi, int wait)
 
 #ifndef CONFIG_XIP
 		/* skip if hart is not available */
-		if (!(gd->arch.available_harts & (1 << reg)))
+		if (!test_bit(reg, gd->arch.available_harts))
 			continue;
 #endif