Patchwork [3/9] ARM: tegra: # of CPU cores detection w/ & w/o HAVE_ARM_SCU

login
register
mail settings
Submitter Hiroshi Doyu
Date Dec. 20, 2012, 9:44 a.m.
Message ID <1355996654-6579-4-git-send-email-hdoyu@nvidia.com>
Download mbox | patch
Permalink /patch/207635/
State Changes Requested, archived
Headers show

Comments

Hiroshi Doyu - Dec. 20, 2012, 9:44 a.m.
The method to detect the number of CPU cores on Cortex-A9 MPCore and
Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
have to read it from the system coprocessor(CP15), because the SCU on
Cortex-A15 MPCore does not have software readable registers. This
patch selects the correct method at runtime based on the CPU ID.

Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
---
 arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)
Felipe Balbi - Dec. 20, 2012, 10:06 a.m.
Hi,

On Thu, Dec 20, 2012 at 11:44:01AM +0200, Hiroshi Doyu wrote:
> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> have to read it from the system coprocessor(CP15), because the SCU on
> Cortex-A15 MPCore does not have software readable registers. This
> patch selects the correct method at runtime based on the CPU ID.
> 
> Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> ---
>  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> index 1b926df..68e76ef 100644
> --- a/arch/arm/mach-tegra/platsmp.c
> +++ b/arch/arm/mach-tegra/platsmp.c
> @@ -23,6 +23,7 @@
>  #include <asm/hardware/gic.h>
>  #include <asm/mach-types.h>
>  #include <asm/smp_scu.h>
> +#include <asm/cputype.h>
>  
>  #include <mach/powergate.h>
>  
> @@ -34,9 +35,13 @@
>  #include "common.h"
>  #include "iomap.h"
>  
> +#define CPU_MASK		0xff0ffff0
> +#define CPU_CORTEX_A9		0x410fc090
> +#define CPU_CORTEX_A15		0x410fc0f0
> +
>  extern void tegra_secondary_startup(void);
>  
> -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
> +static void __iomem *scu_base;
>  
>  #define EVP_CPU_RESET_VECTOR \
>  	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
> @@ -149,7 +154,26 @@ done:
>   */
>  static void __init tegra_smp_init_cpus(void)
>  {
> -	unsigned int i, ncores = scu_get_core_count(scu_base);
> +	unsigned int i, cpu_id, ncores;
> +	u32 l2ctlr;
> +	phys_addr_t pa;
> +
> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> +	switch (cpu_id) {
> +	case CPU_CORTEX_A15:
> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> +		ncores = ((l2ctlr >> 24) & 3) + 1;
> +		break;
> +	case CPU_CORTEX_A9:
> +		/* Get SCU physical base */
> +		asm("mrc p15, 4, %0, c15, c0, 0" : "=r" (pa));
> +		scu_base = IO_ADDRESS(pa);
> +		ncores = scu_get_core_count(scu_base);
> +		break;
> +	default:
> +		BUG();

instead of bugging out, how about setting ncores to 1 instead ?
Marc Zyngier - Dec. 20, 2012, 11:17 a.m.
On 20/12/12 09:44, Hiroshi Doyu wrote:
> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> have to read it from the system coprocessor(CP15), because the SCU on
> Cortex-A15 MPCore does not have software readable registers. This
> patch selects the correct method at runtime based on the CPU ID.
> 
> Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> ---
>  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> index 1b926df..68e76ef 100644
> --- a/arch/arm/mach-tegra/platsmp.c
> +++ b/arch/arm/mach-tegra/platsmp.c
> @@ -23,6 +23,7 @@
>  #include <asm/hardware/gic.h>
>  #include <asm/mach-types.h>
>  #include <asm/smp_scu.h>
> +#include <asm/cputype.h>
>  
>  #include <mach/powergate.h>
>  
> @@ -34,9 +35,13 @@
>  #include "common.h"
>  #include "iomap.h"
>  
> +#define CPU_MASK		0xff0ffff0
> +#define CPU_CORTEX_A9		0x410fc090
> +#define CPU_CORTEX_A15		0x410fc0f0
> +
>  extern void tegra_secondary_startup(void);
>  
> -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
> +static void __iomem *scu_base;
>  
>  #define EVP_CPU_RESET_VECTOR \
>  	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
> @@ -149,7 +154,26 @@ done:
>   */
>  static void __init tegra_smp_init_cpus(void)
>  {
> -	unsigned int i, ncores = scu_get_core_count(scu_base);
> +	unsigned int i, cpu_id, ncores;
> +	u32 l2ctlr;
> +	phys_addr_t pa;
> +
> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> +	switch (cpu_id) {
> +	case CPU_CORTEX_A15:
> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> +		ncores = ((l2ctlr >> 24) & 3) + 1;

Please, do not do that. It doesn't scale to multiple clusters. Instead,
you can now rely on arm_dt_init_cpu_maps() to do the right thing as long
as your device tree exposes all the cpu nodes.

	M.
Hiroshi Doyu - Dec. 20, 2012, 11:21 a.m.
Felipe Balbi <balbi@ti.com> wrote @ Thu, 20 Dec 2012 11:06:25 +0100:
...
> > @@ -149,7 +154,26 @@ done:
> >   */
> >  static void __init tegra_smp_init_cpus(void)
> >  {
> > -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > +	unsigned int i, cpu_id, ncores;
> > +	u32 l2ctlr;
> > +	phys_addr_t pa;
> > +
> > +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > +	switch (cpu_id) {
> > +	case CPU_CORTEX_A15:
> > +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > +		ncores = ((l2ctlr >> 24) & 3) + 1;
> > +		break;
> > +	case CPU_CORTEX_A9:
> > +		/* Get SCU physical base */
> > +		asm("mrc p15, 4, %0, c15, c0, 0" : "=r" (pa));
> > +		scu_base = IO_ADDRESS(pa);
> > +		ncores = scu_get_core_count(scu_base);
> > +		break;
> > +	default:
> > +		BUG();
> 
> instead of bugging out, how about setting ncores to 1 instead ?

Maybe that would be useful in the case of adding new ARM core in the
future.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hiroshi Doyu - Dec. 20, 2012, 11:26 a.m.
Marc Zyngier <marc.zyngier@arm.com> wrote @ Thu, 20 Dec 2012 12:17:08 +0100:

> On 20/12/12 09:44, Hiroshi Doyu wrote:
> > The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > have to read it from the system coprocessor(CP15), because the SCU on
> > Cortex-A15 MPCore does not have software readable registers. This
> > patch selects the correct method at runtime based on the CPU ID.
> > 
> > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> > ---
> >  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
> >  1 file changed, 28 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> > index 1b926df..68e76ef 100644
> > --- a/arch/arm/mach-tegra/platsmp.c
> > +++ b/arch/arm/mach-tegra/platsmp.c
> > @@ -23,6 +23,7 @@
> >  #include <asm/hardware/gic.h>
> >  #include <asm/mach-types.h>
> >  #include <asm/smp_scu.h>
> > +#include <asm/cputype.h>
> >  
> >  #include <mach/powergate.h>
> >  
> > @@ -34,9 +35,13 @@
> >  #include "common.h"
> >  #include "iomap.h"
> >  
> > +#define CPU_MASK		0xff0ffff0
> > +#define CPU_CORTEX_A9		0x410fc090
> > +#define CPU_CORTEX_A15		0x410fc0f0
> > +
> >  extern void tegra_secondary_startup(void);
> >  
> > -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
> > +static void __iomem *scu_base;
> >  
> >  #define EVP_CPU_RESET_VECTOR \
> >  	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
> > @@ -149,7 +154,26 @@ done:
> >   */
> >  static void __init tegra_smp_init_cpus(void)
> >  {
> > -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > +	unsigned int i, cpu_id, ncores;
> > +	u32 l2ctlr;
> > +	phys_addr_t pa;
> > +
> > +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > +	switch (cpu_id) {
> > +	case CPU_CORTEX_A15:
> > +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > +		ncores = ((l2ctlr >> 24) & 3) + 1;
> 
> Please, do not do that. It doesn't scale to multiple clusters. Instead,
> you can now rely on arm_dt_init_cpu_maps() to do the right thing as long
> as your device tree exposes all the cpu nodes.

Ok, this could be the answer why DT for cpu is necessary even when
it's got from CP15?

http://lists.infradead.org/pipermail/linux-arm-kernel/2012-December/137989.html
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier - Dec. 20, 2012, 11:32 a.m.
On 20/12/12 11:26, Hiroshi Doyu wrote:
> Marc Zyngier <marc.zyngier@arm.com> wrote @ Thu, 20 Dec 2012 12:17:08 +0100:
> 
>> On 20/12/12 09:44, Hiroshi Doyu wrote:
>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and
>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
>>> have to read it from the system coprocessor(CP15), because the SCU on
>>> Cortex-A15 MPCore does not have software readable registers. This
>>> patch selects the correct method at runtime based on the CPU ID.
>>>
>>> Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
>>> ---
>>>  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
>>>  1 file changed, 28 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
>>> index 1b926df..68e76ef 100644
>>> --- a/arch/arm/mach-tegra/platsmp.c
>>> +++ b/arch/arm/mach-tegra/platsmp.c
>>> @@ -23,6 +23,7 @@
>>>  #include <asm/hardware/gic.h>
>>>  #include <asm/mach-types.h>
>>>  #include <asm/smp_scu.h>
>>> +#include <asm/cputype.h>
>>>  
>>>  #include <mach/powergate.h>
>>>  
>>> @@ -34,9 +35,13 @@
>>>  #include "common.h"
>>>  #include "iomap.h"
>>>  
>>> +#define CPU_MASK		0xff0ffff0
>>> +#define CPU_CORTEX_A9		0x410fc090
>>> +#define CPU_CORTEX_A15		0x410fc0f0
>>> +
>>>  extern void tegra_secondary_startup(void);
>>>  
>>> -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
>>> +static void __iomem *scu_base;
>>>  
>>>  #define EVP_CPU_RESET_VECTOR \
>>>  	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
>>> @@ -149,7 +154,26 @@ done:
>>>   */
>>>  static void __init tegra_smp_init_cpus(void)
>>>  {
>>> -	unsigned int i, ncores = scu_get_core_count(scu_base);
>>> +	unsigned int i, cpu_id, ncores;
>>> +	u32 l2ctlr;
>>> +	phys_addr_t pa;
>>> +
>>> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
>>> +	switch (cpu_id) {
>>> +	case CPU_CORTEX_A15:
>>> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
>>> +		ncores = ((l2ctlr >> 24) & 3) + 1;
>>
>> Please, do not do that. It doesn't scale to multiple clusters. Instead,
>> you can now rely on arm_dt_init_cpu_maps() to do the right thing as long
>> as your device tree exposes all the cpu nodes.
> 
> Ok, this could be the answer why DT for cpu is necessary even when
> it's got from CP15?
> 
> http://lists.infradead.org/pipermail/linux-arm-kernel/2012-December/137989.html
> 

Indeed, there is no unified way to find out how many CPUs are present in
a system. The A15 trick is neat (we used it in the past), but doesn't
scale to multiple clusters, not to mention heterogeneous clusters.

Hence the DT bindings for cpus, which neatly solves this problem and is
future proof.

	M.
Felipe Balbi - Dec. 20, 2012, 6:18 p.m.
Hi,

On Thu, Dec 20, 2012 at 12:21:36PM +0100, Hiroshi Doyu wrote:
> Felipe Balbi <balbi@ti.com> wrote @ Thu, 20 Dec 2012 11:06:25 +0100:
> ...
> > > @@ -149,7 +154,26 @@ done:
> > >   */
> > >  static void __init tegra_smp_init_cpus(void)
> > >  {
> > > -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > > +	unsigned int i, cpu_id, ncores;
> > > +	u32 l2ctlr;
> > > +	phys_addr_t pa;
> > > +
> > > +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > > +	switch (cpu_id) {
> > > +	case CPU_CORTEX_A15:
> > > +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > > +		ncores = ((l2ctlr >> 24) & 3) + 1;
> > > +		break;
> > > +	case CPU_CORTEX_A9:
> > > +		/* Get SCU physical base */
> > > +		asm("mrc p15, 4, %0, c15, c0, 0" : "=r" (pa));
> > > +		scu_base = IO_ADDRESS(pa);
> > > +		ncores = scu_get_core_count(scu_base);
> > > +		break;
> > > +	default:
> > > +		BUG();
> > 
> > instead of bugging out, how about setting ncores to 1 instead ?
> 
> Maybe that would be useful in the case of adding new ARM core in the
> future.

right, kernel would at least boot in that case.

Patch

diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index 1b926df..68e76ef 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -23,6 +23,7 @@ 
 #include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
 #include <asm/smp_scu.h>
+#include <asm/cputype.h>
 
 #include <mach/powergate.h>
 
@@ -34,9 +35,13 @@ 
 #include "common.h"
 #include "iomap.h"
 
+#define CPU_MASK		0xff0ffff0
+#define CPU_CORTEX_A9		0x410fc090
+#define CPU_CORTEX_A15		0x410fc0f0
+
 extern void tegra_secondary_startup(void);
 
-static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
+static void __iomem *scu_base;
 
 #define EVP_CPU_RESET_VECTOR \
 	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
@@ -149,7 +154,26 @@  done:
  */
 static void __init tegra_smp_init_cpus(void)
 {
-	unsigned int i, ncores = scu_get_core_count(scu_base);
+	unsigned int i, cpu_id, ncores;
+	u32 l2ctlr;
+	phys_addr_t pa;
+
+	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
+	switch (cpu_id) {
+	case CPU_CORTEX_A15:
+		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+		ncores = ((l2ctlr >> 24) & 3) + 1;
+		break;
+	case CPU_CORTEX_A9:
+		/* Get SCU physical base */
+		asm("mrc p15, 4, %0, c15, c0, 0" : "=r" (pa));
+		scu_base = IO_ADDRESS(pa);
+		ncores = scu_get_core_count(scu_base);
+		break;
+	default:
+		BUG();
+		break;
+	}
 
 	if (ncores > nr_cpu_ids) {
 		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
@@ -166,7 +190,8 @@  static void __init tegra_smp_init_cpus(void)
 static void __init tegra_smp_prepare_cpus(unsigned int max_cpus)
 {
 	tegra_cpu_reset_handler_init();
-	scu_enable(scu_base);
+	if (scu_base)
+		scu_enable(scu_base);
 }
 
 struct smp_operations tegra_smp_ops __initdata = {