Patchwork powerpc: update ibm,client-architecture

login
register
mail settings
Submitter jschopp@austin.ibm.com
Date Dec. 18, 2009, 9:07 p.m.
Message ID <1261170452.8134.13.camel@jschopp-laptop>
Download mbox | patch
Permalink /patch/41430/
State Changes Requested
Headers show

Comments

jschopp@austin.ibm.com - Dec. 18, 2009, 9:07 p.m.
In order to boot with more than 64 cores on machines that support the ibm,client-architecture RTAS call a 
new field has been added to the structure.  This patch updates that field and adds a few others in the
process.  It would be good if this could go in as a bugfix.  

Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
Michael Neuling - Dec. 20, 2009, 11:59 p.m.
> Subject: Re: [PATCH] powerpc: update ibm,client-architecture

Please give this a more appropriate name.  

> In order to boot with more than 64 cores on machines that support the
> ibm,client-architecture RTAS call a new field has been added to the
> structure.  This patch updates that field and adds a few others in the
> process.

Please detail what these are here.

> It would be good if this could go in as a bugfix.  
> 
> Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
> 
> Index: linux-2.6.git/arch/powerpc/kernel/prom_init.c
> ===================================================================
> --- linux-2.6.git.orig/arch/powerpc/kernel/prom_init.c
> +++ linux-2.6.git/arch/powerpc/kernel/prom_init.c
> @@ -654,6 +654,9 @@ static void __init early_cmdline_parse(v
>  #define OV5_CMO			0x00
>  #endif
>  
> +/* Option Vector 6: IBM PAPR hints */
> +#define OV6_LINUX		0x02	/* Linux is our OS */
> +
>  /*
>   * The architecture vector has an array of PVR mask/value pairs,
>   * followed by # option vectors - 1, followed by the option vectors.
> @@ -665,7 +668,7 @@ static unsigned char ibm_architecture_ve
>  	W(0xffffffff), W(0x0f000003),	/* all 2.06-compliant */
>  	W(0xffffffff), W(0x0f000002),	/* all 2.05-compliant */
>  	W(0xfffffffe), W(0x0f000001),	/* all 2.04-compliant and earlier */
> -	5 - 1,				/* 5 option vectors */
> +	6 - 1,				/* 6 option vectors */
>  
>  	/* option vector 1: processor architectures supported */
>  	3 - 2,				/* length */
> @@ -697,12 +700,26 @@ static unsigned char ibm_architecture_ve
>  	0,				/* don't halt */
>  
>  	/* option vector 5: PAPR/OF options */
> -	5 - 2,				/* length */
> +	13 - 2,				/* length */
>  	0,				/* don't ignore, don't halt */
>  	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
>  	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
>  	0,
>  	OV5_CMO,
> +	0,				/* reserved */
> +	0,				/* reserved */
> +	0,				/* reserved */
> +	0,				/* reserved */
> +	W(NR_CPUS/4),			/* max cores supported */

4?

> +
> +	/* option vector 6: IBM PAPR hints */
> +	4 - 2,

Please put a comment next to these to say what it is.  

> +	0,				/* reserved */
> +	0,				/* 1 == no secondary pteg */
> +	OV6_LINUX,
> +
> +	/* option vector 7: IBM PAPR OS identification */
> +	/* a human readable ascii string will go here */

Can we do this now or remove the comment.  Maybe UTS_RELEASE or
something like that.

Mikey

>  };
>  
>  /* Old method - ELF header with PT_NOTE sections */
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
Tony Breeds - Dec. 21, 2009, 12:44 a.m.
On Fri, Dec 18, 2009 at 03:07:32PM -0600, Joel Schopp wrote:
> In order to boot with more than 64 cores on machines that support the
> ibm,client-architecture RTAS call a new field has been added to the
> structure.  This patch updates that field and adds a few others in the
> process.  It would be good if this could go in as a bugfix.  

But it's not really is it? What does it fix?

> Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
> 
> Index: linux-2.6.git/arch/powerpc/kernel/prom_init.c
> ===================================================================
> --- linux-2.6.git.orig/arch/powerpc/kernel/prom_init.c
> +++ linux-2.6.git/arch/powerpc/kernel/prom_init.c
> @@ -654,6 +654,9 @@ static void __init early_cmdline_parse(v
>  #define OV5_CMO			0x00
>  #endif
>  
> +/* Option Vector 6: IBM PAPR hints */
> +#define OV6_LINUX		0x02	/* Linux is our OS */
> +
>  /*
>   * The architecture vector has an array of PVR mask/value pairs,
>   * followed by # option vectors - 1, followed by the option vectors.
> @@ -665,7 +668,7 @@ static unsigned char ibm_architecture_ve
>  	W(0xffffffff), W(0x0f000003),	/* all 2.06-compliant */
>  	W(0xffffffff), W(0x0f000002),	/* all 2.05-compliant */
>  	W(0xfffffffe), W(0x0f000001),	/* all 2.04-compliant and earlier */
> -	5 - 1,				/* 5 option vectors */
> +	6 - 1,				/* 6 option vectors */
>  
>  	/* option vector 1: processor architectures supported */
>  	3 - 2,				/* length */
> @@ -697,12 +700,26 @@ static unsigned char ibm_architecture_ve
>  	0,				/* don't halt */
>  
>  	/* option vector 5: PAPR/OF options */
> -	5 - 2,				/* length */
> +	13 - 2,				/* length */
>  	0,				/* don't ignore, don't halt */
>  	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
>  	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
>  	0,
>  	OV5_CMO,
> +	0,				/* reserved */
> +	0,				/* reserved */
> +	0,				/* reserved */
> +	0,				/* reserved */

Not so much reserved, as unused by us.

> +	W(NR_CPUS/4),			/* max cores supported */

4?  I can see a case for 2 or just plain NR_CPUS, but 4 is wrong.

> +	/* option vector 6: IBM PAPR hints */
> +	4 - 2,

We all know this is a length, but please follow the style in this structure.

> +	0,				/* reserved */
> +	0,				/* 1 == no secondary pteg */

I think the "1 == " confuses things.

> +	OV6_LINUX,

Does filling in the vector actually let us boot on systems where we do not already?

> +
> +	/* option vector 7: IBM PAPR OS identification */
> +	/* a human readable ascii string will go here */

Either fill it in or leave the comments out.  

Yours Tony
jschopp@austin.ibm.com - Dec. 21, 2009, 6:15 p.m.
>> Subject: Re: [PATCH] powerpc: update ibm,client-architecture
>>     
>
> Please give this a more appropriate name.  
>   
Any suggestions?
>   
>> In order to boot with more than 64 cores on machines that support the
>> ibm,client-architecture RTAS call a new field has been added to the
>> structure.  This patch updates that field and adds a few others in the
>> process.
>>     
>
> Please detail what these are here.
>   
OK.
>> +	W(NR_CPUS/4),			/* max cores supported */
>>     
>
> 4?
>
>   
4 is the new 2.  Since you don't know the actual threads per core at 
this point in boot you have to be conservative and go with the maximum 
number of any processor.  See page 4 of these charts:
http://www.power.org/events/powercon09/taiwan09/IBM_Overview_POWER7.pdf
>
>
> Can we do this now or remove the comment.  Maybe UTS_RELEASE or
> something like that.
>   
I'll just remove the comment for now.
jschopp@austin.ibm.com - Dec. 21, 2009, 6:22 p.m.
Tony Breeds wrote:
> On Fri, Dec 18, 2009 at 03:07:32PM -0600, Joel Schopp wrote:
>   
>> In order to boot with more than 64 cores on machines that support the
>> ibm,client-architecture RTAS call a new field has been added to the
>> structure.  This patch updates that field and adds a few others in the
>> process.  It would be good if this could go in as a bugfix.  
>>     
>
> But it's not really is it? What does it fix?
>   
It fixes kernels compiled with NR_CPUS where NR_CPUS > 256. 
>
>>  	/* option vector 5: PAPR/OF options */
>> -	5 - 2,				/* length */
>> +	13 - 2,				/* length */
>>  	0,				/* don't ignore, don't halt */
>>  	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
>>  	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
>>  	0,
>>  	OV5_CMO,
>> +	0,				/* reserved */
>> +	0,				/* reserved */
>> +	0,				/* reserved */
>> +	0,				/* reserved */
>>     
>
> Not so much reserved, as unused by us.
>   
They were all reserved until recently, and the last 2 of the 4 are still 
reserved, while we just don't use the first two yet.  I'll remove the 
comments from the first 2.

>   
>> +	W(NR_CPUS/4),			/* max cores supported */
>>     
>
> 4?  I can see a case for 2 or just plain NR_CPUS, but 4 is wrong.
>   
4 is the new 2.  Since the actual threads per core is unknown at this 
point in boot you have to be conservative and go with the maximum number 
of any processor.  See page 4 of these charts:
http://www.power.org/events/powercon09/taiwan09/IBM_Overview_POWER7.pdf
>   
>> +	/* option vector 6: IBM PAPR hints */
>> +	4 - 2,
>>     
>
> We all know this is a length, but please follow the style in this structure.
>   
OK.
>   
>> +	0,				/* reserved */
>> +	0,				/* 1 == no secondary pteg */
>>     
>
> I think the "1 == " confuses things.
>   
I'll remove it then.
>   
>> +	OV6_LINUX,
>>     
>
> Does filling in the vector actually let us boot on systems where we do not already?
>   
Will add description to the email body on this one.
>   
>> +
>> +	/* option vector 7: IBM PAPR OS identification */
>> +	/* a human readable ascii string will go here */
>>     
>
> Either fill it in or leave the comments out.  
>   
I'll leave the comment out.
Michael Neuling - Dec. 21, 2009, 9:12 p.m.
> >> Subject: Re: [PATCH] powerpc: update ibm,client-architecture
> >>     
> >
> > Please give this a more appropriate name.  
> >   
> Any suggestions?

Something that's more descriptive of what the patch does.  Say "Add
max CPU nodes field to ibm,client-architecture call"

> >   
> >> In order to boot with more than 64 cores on machines that support the
> >> ibm,client-architecture RTAS call a new field has been added to the
> >> structure.  This patch updates that field and adds a few others in the
> >> process.
> >>     
> >
> > Please detail what these are here.
> >   
> OK.
> >> +	W(NR_CPUS/4),			/* max cores supported */

FYI reading the PAPR, this comment should technically be "max 'cpu'
nodes presented".

> >>     
> >
> > 4?
> >
> >   
> 4 is the new 2.  

I'd still be asking what 2 is.  It's needs a #define to make clearer
what you are doing.

> Since you don't know the actual threads per core at 
> this point in boot you have to be conservative and go with the maximum 
> number of any processor.  See page 4 of these charts:
> http://www.power.org/events/powercon09/taiwan09/IBM_Overview_POWER7.pdf

I don't think hard wiring 4 in here is right. If we are booting a
machine with SMT2, we will put only half the number of cores that we can
handle in this field.  This is going to break a lot of machines where
people have compiled with NR_CPUS = thread number.

I think you just want to put NR_CPUS here.  

Mikey

> >
> >
> > Can we do this now or remove the comment.  Maybe UTS_RELEASE or
> > something like that.
> >   
> I'll just remove the comment for now.
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
jschopp@austin.ibm.com - Dec. 21, 2009, 10:14 p.m.
>> OK.
>>     
>>>> +	W(NR_CPUS/4),			/* max cores supported */
>>>>         
>
> FYI reading the PAPR, this comment should technically be "max 'cpu'
> nodes presented".
>   
I applied a disambiguation filter to the comment since cpus can mean a 
lot of things these days ( ie hardware threads, cores, chips) , but a 
core is a core.

>> 4 is the new 2.  
>>     
>
> I'd still be asking what 2 is.  It's needs a #define to make clearer
> what you are doing.
>
>   
I'll add a #define
>> Since you don't know the actual threads per core at 
>> this point in boot you have to be conservative and go with the maximum 
>> number of any processor.  See page 4 of these charts:
>> http://www.power.org/events/powercon09/taiwan09/IBM_Overview_POWER7.pdf
>>     
>
> I don't think hard wiring 4 in here is right. If we are booting a
> machine with SMT2, we will put only half the number of cores that we can
> handle in this field.  This is going to break a lot of machines where
> people have compiled with NR_CPUS = thread number.
>
> I think you just want to put NR_CPUS here.  
>
>   
It's a bad interface.  No matter what you choose there will be a 
downside.  1) If you choose NR_CPUS, the best case of how many you could 
boot without SMT, then when you boot with SMT2 or SMT4 you can get 
assigned more cpus than you can boot.  2) If you choose NR_CPUS/4, the 
worst case of how many you could boot, and you get a large machine with 
SMT2 or SMT1 you might have said you support less cpus than you actually 
do and thus not boot all the cpus.  So no matter what you choose you 
could be not booting cpus in some theoretical scenario.
Tony Breeds - Dec. 22, 2009, 12:18 a.m.
On Mon, Dec 21, 2009 at 12:22:09PM -0600, Joel Schopp wrote:

> 4 is the new 2.  Since the actual threads per core is unknown at
> this point in boot you have to be conservative and go with the
> maximum number of any processor.  See page 4 of these charts:
> http://www.power.org/events/powercon09/taiwan09/IBM_Overview_POWER7.pdf

Sure P7 /can/ be 4-way SMT, but consider the case where you know you have 128
threads so you set NR_CPUS to 128, then you boot with "max cores" of 32, so you
lose half of your threads.  I guess that's only a problem when you build your
own kernels, distros are probably setting NR_CPUS high enough to cover all SMT2
systems anyway.

I guess if the patch only set the "max cores" and described why, I'd (FWIW :D)
be more comfortable.

Yours Tony
Michael Neuling - Dec. 22, 2009, 12:33 a.m.
>>> OK.
>>>     
>>>>> +	W(NR_CPUS/4),			/* max cores supported */
>>>>>         
>>
>> FYI reading the PAPR, this comment should technically be "max 'cpu'
>> nodes presented".
>>   
>I applied a disambiguation filter to the comment since cpus can mean a 
>lot of things these days ( ie hardware threads, cores, chips) , but a 
>core is a core.

I think you applied ambiguation filter actually :-)

The PAPR is clear and perfectly understandable in this context.  It's
"OF device tree nodes of type 'cpu'".

>>> 4 is the new 2.  
>>>     
>>
>> I'd still be asking what 2 is.  It's needs a #define to make clearer
>> what you are doing.
>>
>>   
>I'll add a #define
>>> Since you don't know the actual threads per core at 
>>> this point in boot you have to be conservative and go with the maximum 
>>> number of any processor.  See page 4 of these charts:
>>> http://www.power.org/events/powercon09/taiwan09/IBM_Overview_POWER7.pdf
>>>     
>>
>> I don't think hard wiring 4 in here is right. If we are booting a
>> machine with SMT2, we will put only half the number of cores that we can
>> handle in this field.  This is going to break a lot of machines where
>> people have compiled with NR_CPUS = thread number.
>>
>> I think you just want to put NR_CPUS here.  
>>
>>   
>It's a bad interface.  No matter what you choose there will be a 
>downside.  1) If you choose NR_CPUS, the best case of how many you could 
>boot without SMT, then when you boot with SMT2 or SMT4 you can get 
>assigned more cpus than you can boot.  2) If you choose NR_CPUS/4, the 
>worst case of how many you could boot, and you get a large machine with 
>SMT2 or SMT1 you might have said you support less cpus than you actually 
>do and thus not boot all the cpus.  So no matter what you choose you 
>could be not booting cpus in some theoretical scenario. 

Sure but 1) is how Linux behaves now (we can always get more cpus than
we can handle) and 2) is what you are changing it to.  2) is going to
break existing users.

Anyway, if you are intending to change functionality like this, please
make it clear in the check in comment.  

Mikey
Tony Breeds - Dec. 22, 2009, 12:58 a.m.
On Mon, Dec 21, 2009 at 04:14:54PM -0600, Joel Schopp wrote:

> It's a bad interface.  No matter what you choose there will be a
> downside.  1) If you choose NR_CPUS, the best case of how many you
> could boot without SMT, then when you boot with SMT2 or SMT4 you can
> get assigned more cpus than you can boot.  2) If you choose
> NR_CPUS/4, the worst case of how many you could boot, and you get a
> large machine with SMT2 or SMT1 you might have said you support less
> cpus than you actually do and thus not boot all the cpus.  So no
> matter what you choose you could be not booting cpus in some
> theoretical scenario.

We're far enough through boot to determine the threading model, so you /could/
work out what SMT we're in and divide NR_CPUS by that and give that to firmware.

Yours Tony

Patch

Index: linux-2.6.git/arch/powerpc/kernel/prom_init.c
===================================================================
--- linux-2.6.git.orig/arch/powerpc/kernel/prom_init.c
+++ linux-2.6.git/arch/powerpc/kernel/prom_init.c
@@ -654,6 +654,9 @@  static void __init early_cmdline_parse(v
 #define OV5_CMO			0x00
 #endif
 
+/* Option Vector 6: IBM PAPR hints */
+#define OV6_LINUX		0x02	/* Linux is our OS */
+
 /*
  * The architecture vector has an array of PVR mask/value pairs,
  * followed by # option vectors - 1, followed by the option vectors.
@@ -665,7 +668,7 @@  static unsigned char ibm_architecture_ve
 	W(0xffffffff), W(0x0f000003),	/* all 2.06-compliant */
 	W(0xffffffff), W(0x0f000002),	/* all 2.05-compliant */
 	W(0xfffffffe), W(0x0f000001),	/* all 2.04-compliant and earlier */
-	5 - 1,				/* 5 option vectors */
+	6 - 1,				/* 6 option vectors */
 
 	/* option vector 1: processor architectures supported */
 	3 - 2,				/* length */
@@ -697,12 +700,26 @@  static unsigned char ibm_architecture_ve
 	0,				/* don't halt */
 
 	/* option vector 5: PAPR/OF options */
-	5 - 2,				/* length */
+	13 - 2,				/* length */
 	0,				/* don't ignore, don't halt */
 	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
 	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
 	0,
 	OV5_CMO,
+	0,				/* reserved */
+	0,				/* reserved */
+	0,				/* reserved */
+	0,				/* reserved */
+	W(NR_CPUS/4),			/* max cores supported */
+
+	/* option vector 6: IBM PAPR hints */
+	4 - 2,
+	0,				/* reserved */
+	0,				/* 1 == no secondary pteg */
+	OV6_LINUX,
+
+	/* option vector 7: IBM PAPR OS identification */
+	/* a human readable ascii string will go here */
 };
 
 /* Old method - ELF header with PT_NOTE sections */