diff mbox

[RFC] ARM: BCM5301X: Implement SMP support

Message ID 1423600375-18665-1-git-send-email-zajec5@gmail.com
State New
Headers show

Commit Message

Rafał Miłecki Feb. 10, 2015, 8:32 p.m. UTC
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
---
 arch/arm/boot/dts/bcm4708.dtsi       |   1 +
 arch/arm/mach-bcm/Makefile           |   3 +
 arch/arm/mach-bcm/bcm5301x_headsmp.S | 108 ++++++++++++++++++++
 arch/arm/mach-bcm/bcm5301x_smp.c     | 185 +++++++++++++++++++++++++++++++++++
 4 files changed, 297 insertions(+)
 create mode 100644 arch/arm/mach-bcm/bcm5301x_headsmp.S
 create mode 100644 arch/arm/mach-bcm/bcm5301x_smp.c

Comments

Hauke Mehrtens Feb. 13, 2015, 11:54 a.m. UTC | #1
Thanks for working on this.

Someone else with more knowledge about arm cortex A9 SMP stuff should
look at this patch.

Did you had a look at mach-rockchip/platsmp.c ? While I was looking at
SMP stuff this code looked clean to me and they are also using a Cortex A9.

There are some comments in the code.

Hauke

On 02/10/2015 09:32 PM, Rafał Miłecki wrote:
> Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
> ---
>  arch/arm/boot/dts/bcm4708.dtsi       |   1 +
>  arch/arm/mach-bcm/Makefile           |   3 +
>  arch/arm/mach-bcm/bcm5301x_headsmp.S | 108 ++++++++++++++++++++
>  arch/arm/mach-bcm/bcm5301x_smp.c     | 185 +++++++++++++++++++++++++++++++++++
>  4 files changed, 297 insertions(+)
>  create mode 100644 arch/arm/mach-bcm/bcm5301x_headsmp.S
>  create mode 100644 arch/arm/mach-bcm/bcm5301x_smp.c
> 
> diff --git a/arch/arm/boot/dts/bcm4708.dtsi b/arch/arm/boot/dts/bcm4708.dtsi
> index 31141e8..ed4ddba 100644
> --- a/arch/arm/boot/dts/bcm4708.dtsi
> +++ b/arch/arm/boot/dts/bcm4708.dtsi
> @@ -15,6 +15,7 @@
>  	cpus {
>  		#address-cells = <1>;
>  		#size-cells = <0>;
> +		enable-method = "brcm,bcm4708-smp";
>  
>  		cpu@0 {
>  			device_type = "cpu";
> diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile
> index 4c38674..ca12727 100644
> --- a/arch/arm/mach-bcm/Makefile
> +++ b/arch/arm/mach-bcm/Makefile
> @@ -36,6 +36,9 @@ obj-$(CONFIG_ARCH_BCM2835)	+= board_bcm2835.o
>  
>  # BCM5301X
>  obj-$(CONFIG_ARCH_BCM_5301X)	+= bcm_5301x.o
> +ifeq ($(CONFIG_SMP),y)
> +obj-$(CONFIG_ARCH_BCM_5301X)	+= bcm5301x_smp.o bcm5301x_headsmp.o
> +endif
>  
>  # BCM63XXx
>  obj-$(CONFIG_ARCH_BCM_63XX)	:= bcm63xx.o
> diff --git a/arch/arm/mach-bcm/bcm5301x_headsmp.S b/arch/arm/mach-bcm/bcm5301x_headsmp.S
> new file mode 100644
> index 0000000..e8df65f
> --- /dev/null
> +++ b/arch/arm/mach-bcm/bcm5301x_headsmp.S
> @@ -0,0 +1,108 @@
> +/*
> + * Broadcom BCM470X / BCM5301X ARM platform code.
> + *
> + * Copyright 2003 - 2008 Broadcom Corporation
> + *
> + * Licensed under the GNU/GPL. See COPYING for details.
> + */
> +
> +#include <asm/memory.h>
> +
> +#include <linux/linkage.h>
> +#include <linux/init.h>
> +
> +#define __virt_to_phys(x)	((x) - PAGE_OFFSET)
> +
> +/*
> + * v7_l1_cache_invalidate
> + *
> + * Invalidate contents of L1 cache without flushing its contents
> + * into outer cache and memory. This is needed when the contents
> + * of the cache are unpredictable after power-up.
> + *
> + * corrupts r0-r6
> + */
> +ENTRY(v7_l1_cache_invalidate)
> +	mov	r0, #0
> +	mcr	p15, 2, r0, c0, c0, 0	@ set cache level to 1
> +	mrc	p15, 1, r0, c0, c0, 0	@ read CLIDR
> +
> +	ldr	r1, =0x7fff
> +	and	r2, r1, r0, lsr #13	@ get max # of index size
> +
> +	ldr	r1, =0x3ff
> +	and	r3, r1, r0, lsr #3	@ NumWays - 1
> +	add	r2, r2, #1		@ NumSets
> +
> +	and	r0, r0, #0x7
> +	add	r0, r0, #4		@ SetShift
> +
> +	clz	r1, r3			@ WayShift
> +	add	r4, r3, #1		@ NumWays
> +1:	sub	r2, r2, #1		@ NumSets--
> +	mov	r3, r4			@ Temp = NumWays
> +2:	subs	r3, r3, #1		@ Temp--
> +	mov	r5, r3, lsl r1
> +	mov	r6, r2, lsl r0
> +	orr	r5, r5, r6		@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
> +	mcr	p15, 0, r5, c7, c6, 2	@ Invalidate line
> +	bgt	2b
> +	cmp	r2, #0
> +	bgt	1b
> +	dsb
> +	mov	r0,#0
> +	mcr	p15,0,r0,c7,c5,0	/* Invalidate icache */
> +	isb
> +	mov	pc, lr
> +ENDPROC(v7_l1_cache_invalidate)

This function looks similar to v7_invalidate_l1 and v7_flush_icache_all
in arch/arm/mm/cache-v7.S if it is different and it is intended could
you please point it out in the comment.

> +/*
> + * Platform specific entry point for secondary CPUs.  This
> + * provides a "holding pen" into which all secondary cores are held
> + * until we're ready for them to initialise.
> + */
> +	__CPUINIT
> +ENTRY(bcm5301x_secondary_startup)
> +	/*
> +	 * Get hardware CPU id of ours
> +	 */
> +	mrc	p15, 0, r0, c0, c0, 5
> +	and	r0, r0, #15
> +	/*
> +	 * Wait on <pen_release> variable by physical address
> +	 * to contain our hardware CPU id
> +	 */
> +#ifdef CONFIG_SPARSEMEM
> +	ldr	r2, =(PAGE_OFFSET+SZ_128M)
> +	ldr	r1, =pen_release
> +	cmp	r1, r2
> +	bge	1f
> +	ldr	r2, =PAGE_OFFSET
> +	sub	r6, r1, r2
> +	b	2f
> +1:
> +	sub	r1, r1, r2
> +	ldr	r2, =PHYS_OFFSET2
> +	add	r6, r1, r2
> +2:
> +#else
> +	ldr	r6, =__virt_to_phys(pen_release)
> +#endif
> +pen:	ldr	r7, [r6]
> +	cmp	r7, r0
> +	bne	pen
> +	nop
> +	/*
> +	 * In case L1 cache has unpredictable contents at power-up
> +	 * clean its contents without flushing.
> +	 */
> +	bl	v7_l1_cache_invalidate
> +	nop
> +	/*
> +	 * we've been released from the holding pen: secondary_stack
> +	 * should now contain the SVC stack for this core
> +	 */
> +	b	secondary_startup
> +
> +ENDPROC(bcm5301x_secondary_startup)
> +	.ltorg
> diff --git a/arch/arm/mach-bcm/bcm5301x_smp.c b/arch/arm/mach-bcm/bcm5301x_smp.c
> new file mode 100644
> index 0000000..1a173ec
> --- /dev/null
> +++ b/arch/arm/mach-bcm/bcm5301x_smp.c
> @@ -0,0 +1,185 @@
> +/*
> + * Broadcom BCM470X / BCM5301X ARM platform code.
> + *
> + * Copyright (C) 2002 ARM Ltd.
> + * Copyright (C) 2015 Rafał Miłecki <zajec5@gmail.com>
> + *
> + * Licensed under the GNU/GPL. See COPYING for details.
> + */
> +
> +#include <asm/cacheflush.h>
> +#include <asm/delay.h>
> +#include <asm/smp_scu.h>
> +
> +#include <linux/clockchips.h>
> +
> +/*
> + * There is a 1KB LUT located at 0xFFFF0400-0xFFFFFFFF, and its first entry
> + * is where the secondary entry point needs to be written
> +*/
> +#define SOC_ROM_BASE_PA		0xffff0000
> +#define SOC_ROM_LUT_OFF		0x400

Shouldn't this be given through device tree?

> +
> +/* ENTRY in bcm5301x_headsmp.S */
> +extern void bcm5301x_secondary_startup(void);

Shouldn't this go into some common header? I think we do not have such a
header so it is not that of a problem for now.

> +
> +static DEFINE_SPINLOCK(boot_lock);
> +
> +static void __cpuinit write_pen_release(int val)
> +{
> +	pen_release = val;
> +	/* Make sure this store is visible to other CPUs */
> +	smp_wmb();
> +	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
> +	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
> +}
> +
> +static void __init bcm5301x_smp_secondary_set_entry(void (*entry_point)(void))
> +{
> +	void __iomem *rombase = NULL;
> +	phys_addr_t lut_pa;
> +	u32 offset, mask;
> +	u32 val;
> +
> +	mask = (1UL << PAGE_SHIFT) - 1;
> +
> +	lut_pa = SOC_ROM_BASE_PA & ~mask;
> +	offset = SOC_ROM_BASE_PA &  mask;
> +	offset += SOC_ROM_LUT_OFF;
> +
> +	rombase = ioremap(lut_pa, PAGE_SIZE);
> +	if (!rombase)
> +		return;
> +	val = virt_to_phys(entry_point);
> +
> +	writel(val, rombase + offset);
> +
> +	smp_wmb();	/* probably not needed - io regs are not cached */
> +	dsb_sev();	/* Exit WFI */
> +	mb();
> +
> +	iounmap(rombase);
> +}
> +
> +static void __init bcm5301x_smp_prepare_cpus(unsigned int max_cpus)
> +{
> +	void __iomem *scu_base;
> +	unsigned int ncores;
> +
> +	if (!scu_a9_has_base()) {
> +		pr_warn("Unknown SCU base\n");
> +		return;
> +	}
> +
> +	scu_base = ioremap((phys_addr_t)scu_a9_get_base(), SZ_256);
> +	if (!scu_base) {
> +		pr_err("Failed to remap SCU\n");
> +		return;
> +	}
> +
> +	ncores = scu_get_core_count(scu_base);
> +	if (max_cpus > ncores) {
> +		unsigned int i;
> +
> +		pr_warn("Possible CPU mask exceeds available cores, reducing to %u\n",
> +			ncores);
> +		for (i = ncores - 1; i < max_cpus; i++)
> +			set_cpu_present(i, false);
> +		max_cpus = ncores;
> +	}
> +
> +	if (max_cpus > 1) {
> +		/* nobody is to be released from the pen yet */
> +		pen_release = -1;
> +
> +		/* Initialise the SCU */
> +		scu_enable(scu_base);
> +
> +		/* Let CPUs know where to start */
> +		bcm5301x_smp_secondary_set_entry(bcm5301x_secondary_startup);
> +	}
> +
> +	iounmap(scu_base);
> +}
> +
> +static void __cpuinit bcm5301x_smp_secondary_init(unsigned int cpu)
> +{
> +	trace_hardirqs_off();
> +
> +	/*
> +	 * let the primary processor know we're out of the
> +	 * pen, then head off into the C entry point
> +	 */
> +	write_pen_release(-1);
> +
> +	/*
> +	 * Synchronise with the boot thread.
> +	 */
> +	spin_lock(&boot_lock);
> +	spin_unlock(&boot_lock);
> +}
> +
> +static int __cpuinit bcm5301x_smp_boot_secondary(unsigned int cpu,
> +						 struct task_struct *idle)
> +{
> +	unsigned long timeout;
> +
> +	/*
> +	 * set synchronisation state between this boot processor
> +	 * and the secondary one
> +	 */
> +	spin_lock(&boot_lock);
> +
> +	/*
> +	 * The secondary processor is waiting to be released from
> +	 * the holding pen - release it, then wait for it to flag
> +	 * that it has been released by resetting pen_release.
> +	 *
> +	 * Note that "pen_release" is the hardware CPU ID, whereas
> +	 * "cpu" is Linux's internal ID.
> +	 */
> +	write_pen_release(cpu);
> +
> +	dsb_sev();
> +
> +	/*
> +	 * Timeout set on purpose in jiffies so that on slow processors
> +	 * that must also have low HZ it will wait longer.
> +	 */
> +	timeout = jiffies + (HZ * 10);
> +
> +	udelay(100);
> +
> +	/*
> +	 * If the secondary CPU was waiting on WFE, it should
> +	 * be already watching <pen_release>, or it could be
> +	 * waiting in WFI, send it an IPI to be sure it wakes.
> +	 */
> +	if (pen_release != -1)
> +		tick_broadcast(cpumask_of(cpu));
> +
> +	while (time_before(jiffies, timeout)) {
> +		smp_rmb();
> +		if (pen_release == -1)
> +			break;
> +
> +		udelay(10);
> +	}
> +
> +	/*
> +	 * now the secondary core is starting up let it run its
> +	 * calibrations, then wait for it to finish
> +	 */
> +	spin_unlock(&boot_lock);
> +
> +	return pen_release != -1 ? -ENOSYS : 0;
> +}
> +
> +static struct smp_operations bcm5301x_smp_ops __initdata = {
> +	.smp_prepare_cpus	= bcm5301x_smp_prepare_cpus,
> +	.smp_secondary_init	= bcm5301x_smp_secondary_init,
> +	.smp_boot_secondary	= bcm5301x_smp_boot_secondary,
> +};
> +
> +CPU_METHOD_OF_DECLARE(bcm5301x_smp, "brcm,bcm4708-smp",
> +		      &bcm5301x_smp_ops);
>
Mark Rutland Feb. 13, 2015, 12:29 p.m. UTC | #2
On Tue, Feb 10, 2015 at 08:32:55PM +0000, Rafał Miłecki wrote:
> Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
> ---
>  arch/arm/boot/dts/bcm4708.dtsi       |   1 +
>  arch/arm/mach-bcm/Makefile           |   3 +
>  arch/arm/mach-bcm/bcm5301x_headsmp.S | 108 ++++++++++++++++++++
>  arch/arm/mach-bcm/bcm5301x_smp.c     | 185 +++++++++++++++++++++++++++++++++++
>  4 files changed, 297 insertions(+)
>  create mode 100644 arch/arm/mach-bcm/bcm5301x_headsmp.S
>  create mode 100644 arch/arm/mach-bcm/bcm5301x_smp.c
> 
> diff --git a/arch/arm/boot/dts/bcm4708.dtsi b/arch/arm/boot/dts/bcm4708.dtsi
> index 31141e8..ed4ddba 100644
> --- a/arch/arm/boot/dts/bcm4708.dtsi
> +++ b/arch/arm/boot/dts/bcm4708.dtsi
> @@ -15,6 +15,7 @@
>         cpus {
>                 #address-cells = <1>;
>                 #size-cells = <0>;
> +               enable-method = "brcm,bcm4708-smp";

This must be documented.

We really should be getting to the point where we have a small number of
standard(ish) enable methods rather than just adding a load of new IMP
DEF methods with pointless differences.

> 
>                 cpu@0 {
>                         device_type = "cpu";
> diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile
> index 4c38674..ca12727 100644
> --- a/arch/arm/mach-bcm/Makefile
> +++ b/arch/arm/mach-bcm/Makefile
> @@ -36,6 +36,9 @@ obj-$(CONFIG_ARCH_BCM2835)    += board_bcm2835.o
> 
>  # BCM5301X
>  obj-$(CONFIG_ARCH_BCM_5301X)   += bcm_5301x.o
> +ifeq ($(CONFIG_SMP),y)
> +obj-$(CONFIG_ARCH_BCM_5301X)   += bcm5301x_smp.o bcm5301x_headsmp.o
> +endif
> 
>  # BCM63XXx
>  obj-$(CONFIG_ARCH_BCM_63XX)    := bcm63xx.o
> diff --git a/arch/arm/mach-bcm/bcm5301x_headsmp.S b/arch/arm/mach-bcm/bcm5301x_headsmp.S
> new file mode 100644
> index 0000000..e8df65f
> --- /dev/null
> +++ b/arch/arm/mach-bcm/bcm5301x_headsmp.S
> @@ -0,0 +1,108 @@
> +/*
> + * Broadcom BCM470X / BCM5301X ARM platform code.
> + *
> + * Copyright 2003 - 2008 Broadcom Corporation
> + *
> + * Licensed under the GNU/GPL. See COPYING for details.
> + */
> +
> +#include <asm/memory.h>
> +
> +#include <linux/linkage.h>
> +#include <linux/init.h>
> +
> +#define __virt_to_phys(x)      ((x) - PAGE_OFFSET)

This does not looks like something there should be a custom
implementation of.

> +
> +/*
> + * v7_l1_cache_invalidate
> + *
> + * Invalidate contents of L1 cache without flushing its contents
> + * into outer cache and memory. This is needed when the contents
> + * of the cache are unpredictable after power-up.
> + *
> + * corrupts r0-r6
> + */
> +ENTRY(v7_l1_cache_invalidate)
> +       mov     r0, #0
> +       mcr     p15, 2, r0, c0, c0, 0   @ set cache level to 1
> +       mrc     p15, 1, r0, c0, c0, 0   @ read CLIDR

Isn't that the CCSIDR, not the CLIDR?

You need an ISB between CSSELR writes and CCSIDR reads.

> +
> +       ldr     r1, =0x7fff
> +       and     r2, r1, r0, lsr #13     @ get max # of index size
> +
> +       ldr     r1, =0x3ff
> +       and     r3, r1, r0, lsr #3      @ NumWays - 1
> +       add     r2, r2, #1              @ NumSets
> +
> +       and     r0, r0, #0x7
> +       add     r0, r0, #4              @ SetShift
> +
> +       clz     r1, r3                  @ WayShift
> +       add     r4, r3, #1              @ NumWays
> +1:     sub     r2, r2, #1              @ NumSets--
> +       mov     r3, r4                  @ Temp = NumWays
> +2:     subs    r3, r3, #1              @ Temp--
> +       mov     r5, r3, lsl r1
> +       mov     r6, r2, lsl r0
> +       orr     r5, r5, r6              @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
> +       mcr     p15, 0, r5, c7, c6, 2   @ Invalidate line
> +       bgt     2b
> +       cmp     r2, #0
> +       bgt     1b
> +       dsb
> +       mov     r0,#0
> +       mcr     p15,0,r0,c7,c5,0        /* Invalidate icache */
> +       isb

SUrely you're missing a dsb after the i-cache maintenance?

> +       mov     pc, lr
> +ENDPROC(v7_l1_cache_invalidate)

This looks like a total mess. If you _really_ need this, factor it out
of the existing cache flush infrastructure. We don't need more broken
copies.

Do you have a guarantee that the CPU won't write back any of this
naturally before the invalidate is complete?

Is the CPU coherent at this point?

> +
> +/*
> + * Platform specific entry point for secondary CPUs.  This
> + * provides a "holding pen" into which all secondary cores are held
> + * until we're ready for them to initialise.
> + */
> +       __CPUINIT
> +ENTRY(bcm5301x_secondary_startup)
> +       /*
> +        * Get hardware CPU id of ours
> +        */
> +       mrc     p15, 0, r0, c0, c0, 5
> +       and     r0, r0, #15

Test all of the MPIDR.Aff* bits, please.

> +       /*
> +        * Wait on <pen_release> variable by physical address
> +        * to contain our hardware CPU id
> +        */
> +#ifdef CONFIG_SPARSEMEM
> +       ldr     r2, =(PAGE_OFFSET+SZ_128M)
> +       ldr     r1, =pen_release
> +       cmp     r1, r2
> +       bge     1f
> +       ldr     r2, =PAGE_OFFSET
> +       sub     r6, r1, r2
> +       b       2f
> +1:
> +       sub     r1, r1, r2
> +       ldr     r2, =PHYS_OFFSET2
> +       add     r6, r1, r2
> +2:

Huh? We really shouldn't have to care about SPARSEMEM in this kind of
code. I assume the fundamental issue here is your custom __virt_to_phys
implementation.

> +#else
> +       ldr     r6, =__virt_to_phys(pen_release)
> +#endif
> +pen:   ldr     r7, [r6]
> +       cmp     r7, r0
> +       bne     pen
> +       nop

Pointless nop?

> +       /*
> +        * In case L1 cache has unpredictable contents at power-up
> +        * clean its contents without flushing.
> +        */
> +       bl      v7_l1_cache_invalidate
> +       nop

Another pointless nop?

> +       /*
> +        * we've been released from the holding pen: secondary_stack
> +        * should now contain the SVC stack for this core
> +        */
> +       b       secondary_startup
> +
> +ENDPROC(bcm5301x_secondary_startup)
> +       .ltorg
> diff --git a/arch/arm/mach-bcm/bcm5301x_smp.c b/arch/arm/mach-bcm/bcm5301x_smp.c
> new file mode 100644
> index 0000000..1a173ec
> --- /dev/null
> +++ b/arch/arm/mach-bcm/bcm5301x_smp.c
> @@ -0,0 +1,185 @@
> +/*
> + * Broadcom BCM470X / BCM5301X ARM platform code.
> + *
> + * Copyright (C) 2002 ARM Ltd.
> + * Copyright (C) 2015 Rafał Miłecki <zajec5@gmail.com>
> + *
> + * Licensed under the GNU/GPL. See COPYING for details.
> + */
> +
> +#include <asm/cacheflush.h>
> +#include <asm/delay.h>
> +#include <asm/smp_scu.h>
> +
> +#include <linux/clockchips.h>
> +
> +/*
> + * There is a 1KB LUT located at 0xFFFF0400-0xFFFFFFFF, and its first entry
> + * is where the secondary entry point needs to be written
> +*/
> +#define SOC_ROM_BASE_PA                0xffff0000
> +#define SOC_ROM_LUT_OFF                0x400

We shouldn't be hard-coding physical addresses; those should come from
the DT.

> +
> +/* ENTRY in bcm5301x_headsmp.S */
> +extern void bcm5301x_secondary_startup(void);
> +
> +static DEFINE_SPINLOCK(boot_lock);
> +
> +static void __cpuinit write_pen_release(int val)
> +{
> +       pen_release = val;
> +       /* Make sure this store is visible to other CPUs */
> +       smp_wmb();
> +       __cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
> +       outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));

Surely we have some common infrastructure to perform this sort of
maintenance?

> +}
> +
> +static void __init bcm5301x_smp_secondary_set_entry(void (*entry_point)(void))
> +{
> +       void __iomem *rombase = NULL;
> +       phys_addr_t lut_pa;
> +       u32 offset, mask;
> +       u32 val;
> +
> +       mask = (1UL << PAGE_SHIFT) - 1;
> +
> +       lut_pa = SOC_ROM_BASE_PA & ~mask;
> +       offset = SOC_ROM_BASE_PA &  mask;
> +       offset += SOC_ROM_LUT_OFF;
> +
> +       rombase = ioremap(lut_pa, PAGE_SIZE);
> +       if (!rombase)
> +               return;
> +       val = virt_to_phys(entry_point);
> +
> +       writel(val, rombase + offset);
> +
> +       smp_wmb();      /* probably not needed - io regs are not cached */

Surely the following DSB is sufficient?

> +       dsb_sev();      /* Exit WFI */
> +       mb();

What's the mb for?

> +
> +       iounmap(rombase);
> +}
> +
> +static void __init bcm5301x_smp_prepare_cpus(unsigned int max_cpus)
> +{
> +       void __iomem *scu_base;
> +       unsigned int ncores;
> +
> +       if (!scu_a9_has_base()) {
> +               pr_warn("Unknown SCU base\n");
> +               return;
> +       }
> +
> +       scu_base = ioremap((phys_addr_t)scu_a9_get_base(), SZ_256);
> +       if (!scu_base) {
> +               pr_err("Failed to remap SCU\n");
> +               return;
> +       }
> +
> +       ncores = scu_get_core_count(scu_base);

Just read this from the DT as we do elsewhere.

> +       if (max_cpus > ncores) {
> +               unsigned int i;
> +
> +               pr_warn("Possible CPU mask exceeds available cores, reducing to %u\n",
> +                       ncores);
> +               for (i = ncores - 1; i < max_cpus; i++)
> +                       set_cpu_present(i, false);
> +               max_cpus = ncores;
> +       }
> +
> +       if (max_cpus > 1) {
> +               /* nobody is to be released from the pen yet */
> +               pen_release = -1;
> +
> +               /* Initialise the SCU */
> +               scu_enable(scu_base);
> +
> +               /* Let CPUs know where to start */
> +               bcm5301x_smp_secondary_set_entry(bcm5301x_secondary_startup);
> +       }
> +
> +       iounmap(scu_base);
> +}

[...]

> +static int __cpuinit bcm5301x_smp_boot_secondary(unsigned int cpu,
> +                                                struct task_struct *idle)
> +{
> +       unsigned long timeout;
> +
> +       /*
> +        * set synchronisation state between this boot processor
> +        * and the secondary one
> +        */
> +       spin_lock(&boot_lock);
> +
> +       /*
> +        * The secondary processor is waiting to be released from
> +        * the holding pen - release it, then wait for it to flag
> +        * that it has been released by resetting pen_release.
> +        *
> +        * Note that "pen_release" is the hardware CPU ID, whereas
> +        * "cpu" is Linux's internal ID.
> +        */
> +       write_pen_release(cpu);

As far as I can tell you're relying on the logical ID being equivalent
to MPDR.Aff0, which isn't necessarily true. Either use the physical ID
or use the actual logical ID.

> +
> +       dsb_sev();
> +
> +       /*
> +        * Timeout set on purpose in jiffies so that on slow processors
> +        * that must also have low HZ it will wait longer.
> +        */
> +       timeout = jiffies + (HZ * 10);
> +
> +       udelay(100);
> +
> +       /*
> +        * If the secondary CPU was waiting on WFE, it should
> +        * be already watching <pen_release>, or it could be
> +        * waiting in WFI, send it an IPI to be sure it wakes.
> +        */
> +       if (pen_release != -1)
> +               tick_broadcast(cpumask_of(cpu));

NAK. This is not what tick_broadcast is intended for.

If you need an IPI then send an IPI, don't piggyback on the timekeeping
infrastructure.

Mark.
diff mbox

Patch

diff --git a/arch/arm/boot/dts/bcm4708.dtsi b/arch/arm/boot/dts/bcm4708.dtsi
index 31141e8..ed4ddba 100644
--- a/arch/arm/boot/dts/bcm4708.dtsi
+++ b/arch/arm/boot/dts/bcm4708.dtsi
@@ -15,6 +15,7 @@ 
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
+		enable-method = "brcm,bcm4708-smp";
 
 		cpu@0 {
 			device_type = "cpu";
diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile
index 4c38674..ca12727 100644
--- a/arch/arm/mach-bcm/Makefile
+++ b/arch/arm/mach-bcm/Makefile
@@ -36,6 +36,9 @@  obj-$(CONFIG_ARCH_BCM2835)	+= board_bcm2835.o
 
 # BCM5301X
 obj-$(CONFIG_ARCH_BCM_5301X)	+= bcm_5301x.o
+ifeq ($(CONFIG_SMP),y)
+obj-$(CONFIG_ARCH_BCM_5301X)	+= bcm5301x_smp.o bcm5301x_headsmp.o
+endif
 
 # BCM63XXx
 obj-$(CONFIG_ARCH_BCM_63XX)	:= bcm63xx.o
diff --git a/arch/arm/mach-bcm/bcm5301x_headsmp.S b/arch/arm/mach-bcm/bcm5301x_headsmp.S
new file mode 100644
index 0000000..e8df65f
--- /dev/null
+++ b/arch/arm/mach-bcm/bcm5301x_headsmp.S
@@ -0,0 +1,108 @@ 
+/*
+ * Broadcom BCM470X / BCM5301X ARM platform code.
+ *
+ * Copyright 2003 - 2008 Broadcom Corporation
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include <asm/memory.h>
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#define __virt_to_phys(x)	((x) - PAGE_OFFSET)
+
+/*
+ * v7_l1_cache_invalidate
+ *
+ * Invalidate contents of L1 cache without flushing its contents
+ * into outer cache and memory. This is needed when the contents
+ * of the cache are unpredictable after power-up.
+ *
+ * corrupts r0-r6
+ */
+ENTRY(v7_l1_cache_invalidate)
+	mov	r0, #0
+	mcr	p15, 2, r0, c0, c0, 0	@ set cache level to 1
+	mrc	p15, 1, r0, c0, c0, 0	@ read CLIDR
+
+	ldr	r1, =0x7fff
+	and	r2, r1, r0, lsr #13	@ get max # of index size
+
+	ldr	r1, =0x3ff
+	and	r3, r1, r0, lsr #3	@ NumWays - 1
+	add	r2, r2, #1		@ NumSets
+
+	and	r0, r0, #0x7
+	add	r0, r0, #4		@ SetShift
+
+	clz	r1, r3			@ WayShift
+	add	r4, r3, #1		@ NumWays
+1:	sub	r2, r2, #1		@ NumSets--
+	mov	r3, r4			@ Temp = NumWays
+2:	subs	r3, r3, #1		@ Temp--
+	mov	r5, r3, lsl r1
+	mov	r6, r2, lsl r0
+	orr	r5, r5, r6		@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+	mcr	p15, 0, r5, c7, c6, 2	@ Invalidate line
+	bgt	2b
+	cmp	r2, #0
+	bgt	1b
+	dsb
+	mov	r0,#0
+	mcr	p15,0,r0,c7,c5,0	/* Invalidate icache */
+	isb
+	mov	pc, lr
+ENDPROC(v7_l1_cache_invalidate)
+
+/*
+ * Platform specific entry point for secondary CPUs.  This
+ * provides a "holding pen" into which all secondary cores are held
+ * until we're ready for them to initialise.
+ */
+	__CPUINIT
+ENTRY(bcm5301x_secondary_startup)
+	/*
+	 * Get hardware CPU id of ours
+	 */
+	mrc	p15, 0, r0, c0, c0, 5
+	and	r0, r0, #15
+	/*
+	 * Wait on <pen_release> variable by physical address
+	 * to contain our hardware CPU id
+	 */
+#ifdef CONFIG_SPARSEMEM
+	ldr	r2, =(PAGE_OFFSET+SZ_128M)
+	ldr	r1, =pen_release
+	cmp	r1, r2
+	bge	1f
+	ldr	r2, =PAGE_OFFSET
+	sub	r6, r1, r2
+	b	2f
+1:
+	sub	r1, r1, r2
+	ldr	r2, =PHYS_OFFSET2
+	add	r6, r1, r2
+2:
+#else
+	ldr	r6, =__virt_to_phys(pen_release)
+#endif
+pen:	ldr	r7, [r6]
+	cmp	r7, r0
+	bne	pen
+	nop
+	/*
+	 * In case L1 cache has unpredictable contents at power-up
+	 * clean its contents without flushing.
+	 */
+	bl	v7_l1_cache_invalidate
+	nop
+	/*
+	 * we've been released from the holding pen: secondary_stack
+	 * should now contain the SVC stack for this core
+	 */
+	b	secondary_startup
+
+ENDPROC(bcm5301x_secondary_startup)
+	.ltorg
diff --git a/arch/arm/mach-bcm/bcm5301x_smp.c b/arch/arm/mach-bcm/bcm5301x_smp.c
new file mode 100644
index 0000000..1a173ec
--- /dev/null
+++ b/arch/arm/mach-bcm/bcm5301x_smp.c
@@ -0,0 +1,185 @@ 
+/*
+ * Broadcom BCM470X / BCM5301X ARM platform code.
+ *
+ * Copyright (C) 2002 ARM Ltd.
+ * Copyright (C) 2015 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include <asm/cacheflush.h>
+#include <asm/delay.h>
+#include <asm/smp_scu.h>
+
+#include <linux/clockchips.h>
+
+/*
+ * There is a 1KB LUT located at 0xFFFF0400-0xFFFFFFFF, and its first entry
+ * is where the secondary entry point needs to be written
+*/
+#define SOC_ROM_BASE_PA		0xffff0000
+#define SOC_ROM_LUT_OFF		0x400
+
+/* ENTRY in bcm5301x_headsmp.S */
+extern void bcm5301x_secondary_startup(void);
+
+static DEFINE_SPINLOCK(boot_lock);
+
+static void __cpuinit write_pen_release(int val)
+{
+	pen_release = val;
+	/* Make sure this store is visible to other CPUs */
+	smp_wmb();
+	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
+	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+}
+
+static void __init bcm5301x_smp_secondary_set_entry(void (*entry_point)(void))
+{
+	void __iomem *rombase = NULL;
+	phys_addr_t lut_pa;
+	u32 offset, mask;
+	u32 val;
+
+	mask = (1UL << PAGE_SHIFT) - 1;
+
+	lut_pa = SOC_ROM_BASE_PA & ~mask;
+	offset = SOC_ROM_BASE_PA &  mask;
+	offset += SOC_ROM_LUT_OFF;
+
+	rombase = ioremap(lut_pa, PAGE_SIZE);
+	if (!rombase)
+		return;
+	val = virt_to_phys(entry_point);
+
+	writel(val, rombase + offset);
+
+	smp_wmb();	/* probably not needed - io regs are not cached */
+	dsb_sev();	/* Exit WFI */
+	mb();
+
+	iounmap(rombase);
+}
+
+static void __init bcm5301x_smp_prepare_cpus(unsigned int max_cpus)
+{
+	void __iomem *scu_base;
+	unsigned int ncores;
+
+	if (!scu_a9_has_base()) {
+		pr_warn("Unknown SCU base\n");
+		return;
+	}
+
+	scu_base = ioremap((phys_addr_t)scu_a9_get_base(), SZ_256);
+	if (!scu_base) {
+		pr_err("Failed to remap SCU\n");
+		return;
+	}
+
+	ncores = scu_get_core_count(scu_base);
+	if (max_cpus > ncores) {
+		unsigned int i;
+
+		pr_warn("Possible CPU mask exceeds available cores, reducing to %u\n",
+			ncores);
+		for (i = ncores - 1; i < max_cpus; i++)
+			set_cpu_present(i, false);
+		max_cpus = ncores;
+	}
+
+	if (max_cpus > 1) {
+		/* nobody is to be released from the pen yet */
+		pen_release = -1;
+
+		/* Initialise the SCU */
+		scu_enable(scu_base);
+
+		/* Let CPUs know where to start */
+		bcm5301x_smp_secondary_set_entry(bcm5301x_secondary_startup);
+	}
+
+	iounmap(scu_base);
+}
+
+static void __cpuinit bcm5301x_smp_secondary_init(unsigned int cpu)
+{
+	trace_hardirqs_off();
+
+	/*
+	 * let the primary processor know we're out of the
+	 * pen, then head off into the C entry point
+	 */
+	write_pen_release(-1);
+
+	/*
+	 * Synchronise with the boot thread.
+	 */
+	spin_lock(&boot_lock);
+	spin_unlock(&boot_lock);
+}
+
+static int __cpuinit bcm5301x_smp_boot_secondary(unsigned int cpu,
+						 struct task_struct *idle)
+{
+	unsigned long timeout;
+
+	/*
+	 * set synchronisation state between this boot processor
+	 * and the secondary one
+	 */
+	spin_lock(&boot_lock);
+
+	/*
+	 * The secondary processor is waiting to be released from
+	 * the holding pen - release it, then wait for it to flag
+	 * that it has been released by resetting pen_release.
+	 *
+	 * Note that "pen_release" is the hardware CPU ID, whereas
+	 * "cpu" is Linux's internal ID.
+	 */
+	write_pen_release(cpu);
+
+	dsb_sev();
+
+	/*
+	 * Timeout set on purpose in jiffies so that on slow processors
+	 * that must also have low HZ it will wait longer.
+	 */
+	timeout = jiffies + (HZ * 10);
+
+	udelay(100);
+
+	/*
+	 * If the secondary CPU was waiting on WFE, it should
+	 * be already watching <pen_release>, or it could be
+	 * waiting in WFI, send it an IPI to be sure it wakes.
+	 */
+	if (pen_release != -1)
+		tick_broadcast(cpumask_of(cpu));
+
+	while (time_before(jiffies, timeout)) {
+		smp_rmb();
+		if (pen_release == -1)
+			break;
+
+		udelay(10);
+	}
+
+	/*
+	 * now the secondary core is starting up let it run its
+	 * calibrations, then wait for it to finish
+	 */
+	spin_unlock(&boot_lock);
+
+	return pen_release != -1 ? -ENOSYS : 0;
+}
+
+static struct smp_operations bcm5301x_smp_ops __initdata = {
+	.smp_prepare_cpus	= bcm5301x_smp_prepare_cpus,
+	.smp_secondary_init	= bcm5301x_smp_secondary_init,
+	.smp_boot_secondary	= bcm5301x_smp_boot_secondary,
+};
+
+CPU_METHOD_OF_DECLARE(bcm5301x_smp, "brcm,bcm4708-smp",
+		      &bcm5301x_smp_ops);