diff mbox series

[PATCHv8,3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus

Message ID 20231009113036.45988-4-piliu@redhat.com (mailing list archive)
State Superseded
Headers show
Series enable nr_cpus for powerpc | expand

Commit Message

Pingfan Liu Oct. 9, 2023, 11:30 a.m. UTC
If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
ensure the boot_cpu is in cpu_present_mask. This can be achieved by
reserving the last quota for the boot cpu.

Note: the restriction on nr_cpus will be lifted with more effort in the
successive patches

Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@linux.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

Comments

Hari Bathini Oct. 10, 2023, 8:26 a.m. UTC | #1
On 09/10/23 5:00 pm, Pingfan Liu wrote:
> If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
> ensure the boot_cpu is in cpu_present_mask. This can be achieved by
> reserving the last quota for the boot cpu.
> 
> Note: the restriction on nr_cpus will be lifted with more effort in the
> successive patches
> 
> Signed-off-by: Pingfan Liu <piliu@redhat.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Nicholas Piggin <npiggin@gmail.com>
> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Cc: Wen Xiong <wenxiong@linux.ibm.com>
> Cc: Baoquan He <bhe@redhat.com>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: kexec@lists.infradead.org
> To: linuxppc-dev@lists.ozlabs.org
> ---
>   arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
>   1 file changed, 22 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index 81291e13dec0..f9ef0a2666b0 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -454,8 +454,8 @@ struct interrupt_server_node {
>   void __init smp_setup_cpu_maps(void)
>   {
>   	struct device_node *dn;
> -	int shift = 0, cpu = 0;
> -	int j, nthreads = 1;
> +	int terminate, shift = 0, cpu = 0;
> +	int j, bt_thread = 0, nthreads = 1;
>   	int len;
>   	struct interrupt_server_node *intserv_node, *n;
>   	struct list_head *bt_node, head;
> @@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
>   			for (j = 0 ; j < nthreads; j++) {
>   				if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
>   					bt_node = &intserv_node->node;
> +					bt_thread = j;
>   					found_boot_cpu = true;
>   					/*
>   					 * Record the round-shift between dt
> @@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
>   	/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
>   	list_add_tail(&head, bt_node);
>   	pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
> +	terminate = nr_cpu_ids;
>   	list_for_each_entry(intserv_node, &head, node) {
>   
> +		j = 0;

> +		/* Choose a start point to cover the boot cpu */
> +		if (nr_cpu_ids - 1 < bt_thread) {
> +			/*
> +			 * The processor core puts assumption on the thread id,
> +			 * not to breach the assumption.
> +			 */
> +			terminate = nr_cpu_ids - 1;

nthreads is anyway assumed to be same for all cores. So, enforcing
nr_cpu_ids to a minimum of nthreads (and multiple of nthreads) should
make the code much simpler without the need for above check and the
other complexities addressed in the subsequent patches...

Thanks
Hari
Pingfan Liu Oct. 11, 2023, 3:05 a.m. UTC | #2
On Tue, Oct 10, 2023 at 01:56:13PM +0530, Hari Bathini wrote:
> 
> 
> On 09/10/23 5:00 pm, Pingfan Liu wrote:
> > If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
> > ensure the boot_cpu is in cpu_present_mask. This can be achieved by
> > reserving the last quota for the boot cpu.
> > 
> > Note: the restriction on nr_cpus will be lifted with more effort in the
> > successive patches
> > 
> > Signed-off-by: Pingfan Liu <piliu@redhat.com>
> > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > Cc: Nicholas Piggin <npiggin@gmail.com>
> > Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> > Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> > Cc: Wen Xiong <wenxiong@linux.ibm.com>
> > Cc: Baoquan He <bhe@redhat.com>
> > Cc: Ming Lei <ming.lei@redhat.com>
> > Cc: kexec@lists.infradead.org
> > To: linuxppc-dev@lists.ozlabs.org
> > ---
> >   arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
> >   1 file changed, 22 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> > index 81291e13dec0..f9ef0a2666b0 100644
> > --- a/arch/powerpc/kernel/setup-common.c
> > +++ b/arch/powerpc/kernel/setup-common.c
> > @@ -454,8 +454,8 @@ struct interrupt_server_node {
> >   void __init smp_setup_cpu_maps(void)
> >   {
> >   	struct device_node *dn;
> > -	int shift = 0, cpu = 0;
> > -	int j, nthreads = 1;
> > +	int terminate, shift = 0, cpu = 0;
> > +	int j, bt_thread = 0, nthreads = 1;
> >   	int len;
> >   	struct interrupt_server_node *intserv_node, *n;
> >   	struct list_head *bt_node, head;
> > @@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
> >   			for (j = 0 ; j < nthreads; j++) {
> >   				if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
> >   					bt_node = &intserv_node->node;
> > +					bt_thread = j;
> >   					found_boot_cpu = true;
> >   					/*
> >   					 * Record the round-shift between dt
> > @@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
> >   	/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
> >   	list_add_tail(&head, bt_node);
> >   	pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
> > +	terminate = nr_cpu_ids;
> >   	list_for_each_entry(intserv_node, &head, node) {
> > +		j = 0;
> 
> > +		/* Choose a start point to cover the boot cpu */
> > +		if (nr_cpu_ids - 1 < bt_thread) {
> > +			/*
> > +			 * The processor core puts assumption on the thread id,
> > +			 * not to breach the assumption.
> > +			 */
> > +			terminate = nr_cpu_ids - 1;
> 
> nthreads is anyway assumed to be same for all cores. So, enforcing
> nr_cpu_ids to a minimum of nthreads (and multiple of nthreads) should
> make the code much simpler without the need for above check and the
> other complexities addressed in the subsequent patches...
> 

Indeed, this series can be splited into two partsk, [1-2/5] and [3-5/5].
In [1-2/5], if smaller, the nr_cpu_ids is enforced to be equal to
nthreads. I will make it align upward on nthreads in the next version.
So [1-2/5] can be totally independent from the rest patches in this
series.


From an engineer's perspective, [3-5/5] are added to maintain the
nr_cpus semantics. (Finally, nr_cpus=1 can be achieved but requiring
effort on other subsystem)


Testing result on my Power9 machine with SMT=4

-1. taskset -c 4 bash -c 'echo c > /proc/sysrq-trigger'

kdump:/# cat /proc/meminfo | grep Percpu
Percpu:              896 kB
kdump:/# cat /sys/devices/system/cpu/possible
0


-2. taskset -c 5 bash -c 'echo c > /proc/sysrq-trigger'

kdump:/# cat /proc/meminfo | grep Percpu
Percpu:             1792 kB
kdump:/# cat /sys/devices/system/cpu/possible
0-1



-3. taskset -c 6 bash -c 'echo c > /proc/sysrq-trigger'

kdump:/# cat /proc/meminfo | grep Percpu
Percpu:             1792 kB
kdump:/# cat /sys/devices/system/cpu/possible
0,2


-4. taskset -c 7 bash -c 'echo c > /proc/sysrq-trigger'

kdump:/# cat /proc/meminfo | grep Percpu
Percpu:             1792 kB
kdump:/# cat /sys/devices/system/cpu/possible
0,3


Thanks,
Pingfan
Hari Bathini Oct. 12, 2023, 5:32 a.m. UTC | #3
On 11/10/23 8:35 am, Pingfan Liu wrote:
> On Tue, Oct 10, 2023 at 01:56:13PM +0530, Hari Bathini wrote:
>>
>>
>> On 09/10/23 5:00 pm, Pingfan Liu wrote:
>>> If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
>>> ensure the boot_cpu is in cpu_present_mask. This can be achieved by
>>> reserving the last quota for the boot cpu.
>>>
>>> Note: the restriction on nr_cpus will be lifted with more effort in the
>>> successive patches
>>>
>>> Signed-off-by: Pingfan Liu <piliu@redhat.com>
>>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>>> Cc: Nicholas Piggin <npiggin@gmail.com>
>>> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
>>> Cc: Wen Xiong <wenxiong@linux.ibm.com>
>>> Cc: Baoquan He <bhe@redhat.com>
>>> Cc: Ming Lei <ming.lei@redhat.com>
>>> Cc: kexec@lists.infradead.org
>>> To: linuxppc-dev@lists.ozlabs.org
>>> ---
>>>    arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
>>>    1 file changed, 22 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
>>> index 81291e13dec0..f9ef0a2666b0 100644
>>> --- a/arch/powerpc/kernel/setup-common.c
>>> +++ b/arch/powerpc/kernel/setup-common.c
>>> @@ -454,8 +454,8 @@ struct interrupt_server_node {
>>>    void __init smp_setup_cpu_maps(void)
>>>    {
>>>    	struct device_node *dn;
>>> -	int shift = 0, cpu = 0;
>>> -	int j, nthreads = 1;
>>> +	int terminate, shift = 0, cpu = 0;
>>> +	int j, bt_thread = 0, nthreads = 1;
>>>    	int len;
>>>    	struct interrupt_server_node *intserv_node, *n;
>>>    	struct list_head *bt_node, head;
>>> @@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
>>>    			for (j = 0 ; j < nthreads; j++) {
>>>    				if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
>>>    					bt_node = &intserv_node->node;
>>> +					bt_thread = j;
>>>    					found_boot_cpu = true;
>>>    					/*
>>>    					 * Record the round-shift between dt
>>> @@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
>>>    	/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
>>>    	list_add_tail(&head, bt_node);
>>>    	pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
>>> +	terminate = nr_cpu_ids;
>>>    	list_for_each_entry(intserv_node, &head, node) {
>>> +		j = 0;
>>
>>> +		/* Choose a start point to cover the boot cpu */
>>> +		if (nr_cpu_ids - 1 < bt_thread) {
>>> +			/*
>>> +			 * The processor core puts assumption on the thread id,
>>> +			 * not to breach the assumption.
>>> +			 */
>>> +			terminate = nr_cpu_ids - 1;
>>
>> nthreads is anyway assumed to be same for all cores. So, enforcing
>> nr_cpu_ids to a minimum of nthreads (and multiple of nthreads) should
>> make the code much simpler without the need for above check and the
>> other complexities addressed in the subsequent patches...
>>
> 
> Indeed, this series can be splited into two partsk, [1-2/5] and [3-5/5].
> In [1-2/5], if smaller, the nr_cpu_ids is enforced to be equal to
> nthreads. I will make it align upward on nthreads in the next version.
> So [1-2/5] can be totally independent from the rest patches in this
> series.

Yup. Would prefer it that way.

>  From an engineer's perspective, [3-5/5] are added to maintain the
> nr_cpus semantics. (Finally, nr_cpus=1 can be achieved but requiring
> effort on other subsystem)

I understand it would be nice to maintain semantics but not worth the
complexity it brings, IMHO. So, my suggest would be to drop [3-5/5].

Thanks
Hari
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 81291e13dec0..f9ef0a2666b0 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -454,8 +454,8 @@  struct interrupt_server_node {
 void __init smp_setup_cpu_maps(void)
 {
 	struct device_node *dn;
-	int shift = 0, cpu = 0;
-	int j, nthreads = 1;
+	int terminate, shift = 0, cpu = 0;
+	int j, bt_thread = 0, nthreads = 1;
 	int len;
 	struct interrupt_server_node *intserv_node, *n;
 	struct list_head *bt_node, head;
@@ -518,6 +518,7 @@  void __init smp_setup_cpu_maps(void)
 			for (j = 0 ; j < nthreads; j++) {
 				if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
 					bt_node = &intserv_node->node;
+					bt_thread = j;
 					found_boot_cpu = true;
 					/*
 					 * Record the round-shift between dt
@@ -537,11 +538,21 @@  void __init smp_setup_cpu_maps(void)
 	/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
 	list_add_tail(&head, bt_node);
 	pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
+	terminate = nr_cpu_ids;
 	list_for_each_entry(intserv_node, &head, node) {
 
+		j = 0;
+		/* Choose a start point to cover the boot cpu */
+		if (nr_cpu_ids - 1 < bt_thread) {
+			/*
+			 * The processor core puts assumption on the thread id,
+			 * not to breach the assumption.
+			 */
+			terminate = nr_cpu_ids - 1;
+		}
 		avail = intserv_node->avail;
 		nthreads = intserv_node->len / sizeof(int);
-		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
+		for (; j < nthreads && cpu < terminate; j++) {
 			set_cpu_present(cpu, avail);
 			set_cpu_possible(cpu, true);
 			cpu_to_phys_id[cpu] = be32_to_cpu(intserv_node->intserv[j]);
@@ -549,6 +560,14 @@  void __init smp_setup_cpu_maps(void)
 			    j, cpu, be32_to_cpu(intserv_node->intserv[j]));
 			cpu++;
 		}
+		/* Online the boot cpu */
+		if (nr_cpu_ids - 1 < bt_thread) {
+			set_cpu_present(bt_thread, avail);
+			set_cpu_possible(bt_thread, true);
+			cpu_to_phys_id[bt_thread] = be32_to_cpu(intserv_node->intserv[bt_thread]);
+			DBG("    thread %d -> cpu %d (hard id %d)\n",
+			    bt_thread, bt_thread, be32_to_cpu(intserv_node->intserv[bt_thread]));
+		}
 	}
 
 	list_for_each_entry_safe(intserv_node, n, &head, node) {