Patchwork [U-Boot,3/8] armv7: integrate cache maintenance support

login
register
mail settings
Submitter Aneesh V
Date Dec. 22, 2010, 11:54 a.m.
Message ID <1293018898-13253-4-git-send-email-aneesh@ti.com>
Download mbox | patch
Permalink /patch/76394/
State Changes Requested
Delegated to: Albert ARIBAUD
Headers show

Comments

Aneesh V - Dec. 22, 2010, 11:54 a.m.
- Enable I-cache on bootup
- Enable MMU and D-cache immediately after relocation
	- Do necessary initialization before enabling d-cache and MMU
- Changes to cleanup_before_linux()
	- Make changes according to the new framework

Signed-off-by: Aneesh V <aneesh@ti.com>
---
 arch/arm/cpu/armv7/cpu.c   |   47 +++++++++++++++++++------------------------
 arch/arm/cpu/armv7/start.S |   18 +++++++++++++++-
 arch/arm/lib/board.c       |    6 +++++
 arch/arm/lib/cache-cp15.c  |    9 ++++++++
 4 files changed, 53 insertions(+), 27 deletions(-)
Albert ARIBAUD - Jan. 8, 2011, 6:54 a.m.
Hi Aneesh,

Le 22/12/2010 12:54, Aneesh V a écrit :
> - Enable I-cache on bootup
> - Enable MMU and D-cache immediately after relocation
> 	- Do necessary initialization before enabling d-cache and MMU
> - Changes to cleanup_before_linux()
> 	- Make changes according to the new framework
>
> Signed-off-by: Aneesh V<aneesh@ti.com>
> ---
>   arch/arm/cpu/armv7/cpu.c   |   47 +++++++++++++++++++------------------------
>   arch/arm/cpu/armv7/start.S |   18 +++++++++++++++-
>   arch/arm/lib/board.c       |    6 +++++
>   arch/arm/lib/cache-cp15.c  |    9 ++++++++
>   4 files changed, 53 insertions(+), 27 deletions(-)
>
> diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c
> index a01e0d6..b418304 100644
> --- a/arch/arm/cpu/armv7/cpu.c
> +++ b/arch/arm/cpu/armv7/cpu.c
> @@ -38,13 +38,10 @@
>   #ifndef CONFIG_L2_OFF
>   #include<asm/arch/sys_proto.h>
>   #endif
> -
> -static void cache_flush(void);
> +#include<asm/armv7.h>
>
>   int cleanup_before_linux(void)
>   {
> -	unsigned int i;
> -
>   	/*
>   	 * this function is called just before we call linux
>   	 * it prepares the processor for linux
> @@ -53,31 +50,29 @@ int cleanup_before_linux(void)
>   	 */
>   	disable_interrupts();
>
> -	/* turn off I/D-cache */
> +	/*
> +	 * Turn off I-cache and invalidate it
> +	 */
>   	icache_disable();
> -	dcache_disable();

(1) -- see below

> +	invalidate_icache_all();
>
> -	/* invalidate I-cache */
> -	cache_flush();

(2) -- see below

> -
> -#ifndef CONFIG_L2_OFF
> -	/* turn off L2 cache */
> -	l2_cache_disable();
> -	/* invalidate L2 cache also */
> -	invalidate_dcache(get_device_type());
> -#endif
> -	i = 0;
> -	/* mem barrier to sync up things */
> -	asm("mcr p15, 0, %0, c7, c10, 4": :"r"(i));
> +	/*
> +	 * turn off D-cache
> +	 * dcache_disable() in turn flushes the d-cache and disables MMU
> +	 */
> +	dcache_disable();

(3) -- see below

> -#ifndef CONFIG_L2_OFF
> -	l2_cache_enable();
> -#endif
> +	/*
> +	 * After D-cache is flushed and before it is disabled there may
> +	 * be some new valid entries brought into the cache. We are sure
> +	 * that these lines are not dirty and will not affect our execution.
> +	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
> +	 * is all we did during this. We have not pushed anything on to the
> +	 * stack. Neither have we affected any static data)
> +	 * So just invalidate the entire d-cache again to avoid coherency
> +	 * problems for kernel
> +	 */
> +	invalidate_dcache_all();

I'm not sure about the logic here, but I am no cache guru, so don't 
hesitate to make me... flush... with shame. If dcache_disable stayed in 
(1) instead of being moved to (3), wouldn't the cache_flush in (2) 
ensure the no new valid entries would appear in the dcache?
  Put another way, I'd have naively thought the sequence would be:

- disable L2 cache
- disable L1 I and D cache
- invalidate L1 I cache and flush L1 D cache
- flush L2 D cache

>   	return 0;
>   }
> -
> -static void cache_flush(void)
> -{
> -	asm ("mcr p15, 0, %0, c7, c5, 0": :"r" (0));
> -}
> diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
> index 684f2d2..7d17f1e 100644
> --- a/arch/arm/cpu/armv7/start.S
> +++ b/arch/arm/cpu/armv7/start.S
> @@ -241,6 +241,14 @@ clbss_l:str	r2, [r0]		/* clear loop...		    */
>    * initialization, now running from RAM.
>    */
>   jump_2_ram:
> +/*
> + * If I-cache is enabled invalidate it
> + */
> +#ifndef CONFIG_SYS_NO_ICACHE
> +	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
> +	dsb
> +	isb
> +#endif
>   	ldr	r0, _board_init_r_ofs
>   	adr	r1, _start
>   	add	lr, r0, r1
> @@ -276,6 +284,9 @@ cpu_init_crit:
>   	mov	r0, #0			@ set up for MCR
>   	mcr	p15, 0, r0, c8, c7, 0	@ invalidate TLBs
>   	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
> +	mcr	p15, 0, r0, c7, c5, 6  /* invalidate BP array */
> +	dsb
> +	isb
>
>   	/*
>   	 * disable MMU stuff and caches
> @@ -284,7 +295,12 @@ cpu_init_crit:
>   	bic	r0, r0, #0x00002000	@ clear bits 13 (--V-)
>   	bic	r0, r0, #0x00000007	@ clear bits 2:0 (-CAM)
>   	orr	r0, r0, #0x00000002	@ set bit 1 (--A-) Align
> -	orr	r0, r0, #0x00000800	@ set bit 12 (Z---) BTB
> +	orr	r0, r0, #0x00000800	@ set bit 11 (Z---) BTB
> +#ifdef CONFIG_SYS_NO_ICACHE
> +	bic	r0, r0, #0x00001000	@ clear bit 12 (I) I-cache
> +#else
> +	orr	r0, r0, #0x00001000	@ set bit 12 (I) I-cache
> +#endif
>   	mcr	p15, 0, r0, c1, c0, 0
>
>   	/*
> diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
> index 7266381..bef32a6 100644
> --- a/arch/arm/lib/board.c
> +++ b/arch/arm/lib/board.c
> @@ -459,6 +459,12 @@ void board_init_r (gd_t *id, ulong dest_addr)
>
>   	gd->flags |= GD_FLG_RELOC;	/* tell others: relocation done */
>
> +	/*
> +	 * Enable D$:
> +	 * I$, if needed, must be already enabled in start.S
> +	 */
> +	dcache_enable();
> +
>   	monitor_flash_len = _bss_start_ofs;
>   	debug ("monitor flash len: %08lX\n", monitor_flash_len);
>   	board_init();	/* Setup chipselects */
> diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c
> index d9175f0..ca526fb 100644
> --- a/arch/arm/lib/cache-cp15.c
> +++ b/arch/arm/lib/cache-cp15.c
> @@ -34,6 +34,14 @@
>
>   DECLARE_GLOBAL_DATA_PTR;
>
> +void __arm_init_before_mmu(void)
> +{
> +	puts("__arm_init_before_mmu: dummy implementation! "
> +	     "real implementation missing!!\n");
> +}

If the function is absolutely needed, please make sure booting does not 
continue. If the function is not absolutely needed, please rewrite 
message since nothing would actually be 'missing', but rather 'could be 
added'.

> +void arm_init_before_mmu(void)
> +	__attribute__((weak, alias("__arm_init_before_mmu")));
> +
>   static void cp_delay (void)
>   {
>   	volatile int i;
> @@ -65,6 +73,7 @@ static inline void mmu_setup(void)
>   	int i;
>   	u32 reg;
>
> +	arm_init_before_mmu();
>   	/* Set up an identity-mapping for all 4GB, rw for everyone */
>   	for (i = 0; i<  4096; i++)
>   		page_table[i] = i<<  20 | (3<<  10) | 0x12;

Amicalement,
Aneesh V - Jan. 8, 2011, 8:15 a.m.
Hi Albert,

On Saturday 08 January 2011 12:24 PM, Albert ARIBAUD wrote:
> Hi Aneesh,
>
> Le 22/12/2010 12:54, Aneesh V a écrit :
>> - Enable I-cache on bootup
>> - Enable MMU and D-cache immediately after relocation
>> 	- Do necessary initialization before enabling d-cache and MMU
>> - Changes to cleanup_before_linux()
>> 	- Make changes according to the new framework
>>
>> Signed-off-by: Aneesh V<aneesh@ti.com>
>> ---
>>    arch/arm/cpu/armv7/cpu.c   |   47 +++++++++++++++++++------------------------
>>    arch/arm/cpu/armv7/start.S |   18 +++++++++++++++-
>>    arch/arm/lib/board.c       |    6 +++++
>>    arch/arm/lib/cache-cp15.c  |    9 ++++++++
>>    4 files changed, 53 insertions(+), 27 deletions(-)
>>
>> diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c
>> index a01e0d6..b418304 100644
>> --- a/arch/arm/cpu/armv7/cpu.c
>> +++ b/arch/arm/cpu/armv7/cpu.c
>> @@ -38,13 +38,10 @@
>>    #ifndef CONFIG_L2_OFF
>>    #include<asm/arch/sys_proto.h>
>>    #endif
>> -
>> -static void cache_flush(void);
>> +#include<asm/armv7.h>
>>
>>    int cleanup_before_linux(void)
>>    {
>> -	unsigned int i;
>> -
>>    	/*
>>    	 * this function is called just before we call linux
>>    	 * it prepares the processor for linux
>> @@ -53,31 +50,29 @@ int cleanup_before_linux(void)
>>    	 */
>>    	disable_interrupts();
>>
>> -	/* turn off I/D-cache */
>> +	/*
>> +	 * Turn off I-cache and invalidate it
>> +	 */
>>    	icache_disable();
>> -	dcache_disable();
>
> (1) -- see below
>
>> +	invalidate_icache_all();
>>
>> -	/* invalidate I-cache */
>> -	cache_flush();
>
> (2) -- see below
>
>> -
>> -#ifndef CONFIG_L2_OFF
>> -	/* turn off L2 cache */
>> -	l2_cache_disable();
>> -	/* invalidate L2 cache also */
>> -	invalidate_dcache(get_device_type());
>> -#endif
>> -	i = 0;
>> -	/* mem barrier to sync up things */
>> -	asm("mcr p15, 0, %0, c7, c10, 4": :"r"(i));
>> +	/*
>> +	 * turn off D-cache
>> +	 * dcache_disable() in turn flushes the d-cache and disables MMU
>> +	 */
>> +	dcache_disable();
>
> (3) -- see below
>
>> -#ifndef CONFIG_L2_OFF
>> -	l2_cache_enable();
>> -#endif
>> +	/*
>> +	 * After D-cache is flushed and before it is disabled there may
>> +	 * be some new valid entries brought into the cache. We are sure
>> +	 * that these lines are not dirty and will not affect our execution.
>> +	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
>> +	 * is all we did during this. We have not pushed anything on to the
>> +	 * stack. Neither have we affected any static data)
>> +	 * So just invalidate the entire d-cache again to avoid coherency
>> +	 * problems for kernel
>> +	 */
>> +	invalidate_dcache_all();
>
> I'm not sure about the logic here, but I am no cache guru, so don't
> hesitate to make me... flush... with shame. If dcache_disable stayed in
> (1) instead of being moved to (3), wouldn't the cache_flush in (2)
> ensure the no new valid entries would appear in the dcache?
>    Put another way, I'd have naively thought the sequence would be:
>
> - disable L2 cache
> - disable L1 I and D cache
> - invalidate L1 I cache and flush L1 D cache
> - flush L2 D cache

This is what I did too in the beginning. But I ran into problems with
it. Here is how:

1 - disable L2 cache
2 - disable L1 I and D cache
3 - invalidate L1 I cache and flush L1 D cache
4 - flush L2 D cache

- At step 3 and 4 we call the functions to flush the call. These
functions in their prologue push some registers(including r14) to the
stack.
- Please note that the cache is disabled at this point in time. So
the new stack contents go directly into the memory.
- Also, note that the stack may be already cached in L1 or L2(before
the caches were disabled). So we have a coherency issue here.
- Now when the caches are flushed the stack in the cache over-writes
the stack in memory thus losing the latest stack contents.
- When we try to return from the function there is a crash!

Conclusion:
1. If you disable first and then flush you have to be careful about
memory coherency in the period between disable and flush. You must not
write anything to the memory and you must be sure about what you read
from memory. This is difficult unless you program the entire thing in
assembly.
2. If you flush first and then disable you have to take care of the new
entries coming to the cache after the flush and before disabling. This
is what we are doing.

>
>>    	return 0;
>>    }
>> -
>> -static void cache_flush(void)
>> -{
>> -	asm ("mcr p15, 0, %0, c7, c5, 0": :"r" (0));
>> -}
>> diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
>> index 684f2d2..7d17f1e 100644
>> --- a/arch/arm/cpu/armv7/start.S
>> +++ b/arch/arm/cpu/armv7/start.S
>> @@ -241,6 +241,14 @@ clbss_l:str	r2, [r0]		/* clear loop...		    */
>>     * initialization, now running from RAM.
>>     */
>>    jump_2_ram:
>> +/*
>> + * If I-cache is enabled invalidate it
>> + */
>> +#ifndef CONFIG_SYS_NO_ICACHE
>> +	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
>> +	dsb
>> +	isb
>> +#endif
>>    	ldr	r0, _board_init_r_ofs
>>    	adr	r1, _start
>>    	add	lr, r0, r1
>> @@ -276,6 +284,9 @@ cpu_init_crit:
>>    	mov	r0, #0			@ set up for MCR
>>    	mcr	p15, 0, r0, c8, c7, 0	@ invalidate TLBs
>>    	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
>> +	mcr	p15, 0, r0, c7, c5, 6  /* invalidate BP array */
>> +	dsb
>> +	isb
>>
>>    	/*
>>    	 * disable MMU stuff and caches
>> @@ -284,7 +295,12 @@ cpu_init_crit:
>>    	bic	r0, r0, #0x00002000	@ clear bits 13 (--V-)
>>    	bic	r0, r0, #0x00000007	@ clear bits 2:0 (-CAM)
>>    	orr	r0, r0, #0x00000002	@ set bit 1 (--A-) Align
>> -	orr	r0, r0, #0x00000800	@ set bit 12 (Z---) BTB
>> +	orr	r0, r0, #0x00000800	@ set bit 11 (Z---) BTB
>> +#ifdef CONFIG_SYS_NO_ICACHE
>> +	bic	r0, r0, #0x00001000	@ clear bit 12 (I) I-cache
>> +#else
>> +	orr	r0, r0, #0x00001000	@ set bit 12 (I) I-cache
>> +#endif
>>    	mcr	p15, 0, r0, c1, c0, 0
>>
>>    	/*
>> diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
>> index 7266381..bef32a6 100644
>> --- a/arch/arm/lib/board.c
>> +++ b/arch/arm/lib/board.c
>> @@ -459,6 +459,12 @@ void board_init_r (gd_t *id, ulong dest_addr)
>>
>>    	gd->flags |= GD_FLG_RELOC;	/* tell others: relocation done */
>>
>> +	/*
>> +	 * Enable D$:
>> +	 * I$, if needed, must be already enabled in start.S
>> +	 */
>> +	dcache_enable();
>> +
>>    	monitor_flash_len = _bss_start_ofs;
>>    	debug ("monitor flash len: %08lX\n", monitor_flash_len);
>>    	board_init();	/* Setup chipselects */
>> diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c
>> index d9175f0..ca526fb 100644
>> --- a/arch/arm/lib/cache-cp15.c
>> +++ b/arch/arm/lib/cache-cp15.c
>> @@ -34,6 +34,14 @@
>>
>>    DECLARE_GLOBAL_DATA_PTR;
>>
>> +void __arm_init_before_mmu(void)
>> +{
>> +	puts("__arm_init_before_mmu: dummy implementation! "
>> +	     "real implementation missing!!\n");
>> +}
>
> If the function is absolutely needed, please make sure booting does not
> continue. If the function is not absolutely needed, please rewrite
> message since nothing would actually be 'missing', but rather 'could be
> added'.
>

Before ARMv7 caches are guaranteed to be invalidated at reset. But I
couldn't find conclusive information about TLB. So I am not sure, it
may or may not be absolutely necessary.

I shall just remove that message!

>> +void arm_init_before_mmu(void)
>> +	__attribute__((weak, alias("__arm_init_before_mmu")));
>> +
>>    static void cp_delay (void)
>>    {
>>    	volatile int i;
>> @@ -65,6 +73,7 @@ static inline void mmu_setup(void)
>>    	int i;
>>    	u32 reg;
>>
>> +	arm_init_before_mmu();
>>    	/* Set up an identity-mapping for all 4GB, rw for everyone */
>>    	for (i = 0; i<   4096; i++)
>>    		page_table[i] = i<<   20 | (3<<   10) | 0x12;
>
> Amicalement,

Patch

diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c
index a01e0d6..b418304 100644
--- a/arch/arm/cpu/armv7/cpu.c
+++ b/arch/arm/cpu/armv7/cpu.c
@@ -38,13 +38,10 @@ 
 #ifndef CONFIG_L2_OFF
 #include <asm/arch/sys_proto.h>
 #endif
-
-static void cache_flush(void);
+#include <asm/armv7.h>
 
 int cleanup_before_linux(void)
 {
-	unsigned int i;
-
 	/*
 	 * this function is called just before we call linux
 	 * it prepares the processor for linux
@@ -53,31 +50,29 @@  int cleanup_before_linux(void)
 	 */
 	disable_interrupts();
 
-	/* turn off I/D-cache */
+	/*
+	 * Turn off I-cache and invalidate it
+	 */
 	icache_disable();
-	dcache_disable();
+	invalidate_icache_all();
 
-	/* invalidate I-cache */
-	cache_flush();
-
-#ifndef CONFIG_L2_OFF
-	/* turn off L2 cache */
-	l2_cache_disable();
-	/* invalidate L2 cache also */
-	invalidate_dcache(get_device_type());
-#endif
-	i = 0;
-	/* mem barrier to sync up things */
-	asm("mcr p15, 0, %0, c7, c10, 4": :"r"(i));
+	/*
+	 * turn off D-cache
+	 * dcache_disable() in turn flushes the d-cache and disables MMU
+	 */
+	dcache_disable();
 
-#ifndef CONFIG_L2_OFF
-	l2_cache_enable();
-#endif
+	/*
+	 * After D-cache is flushed and before it is disabled there may
+	 * be some new valid entries brought into the cache. We are sure
+	 * that these lines are not dirty and will not affect our execution.
+	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
+	 * is all we did during this. We have not pushed anything on to the
+	 * stack. Neither have we affected any static data) 
+	 * So just invalidate the entire d-cache again to avoid coherency
+	 * problems for kernel
+	 */
+	invalidate_dcache_all();
 
 	return 0;
 }
-
-static void cache_flush(void)
-{
-	asm ("mcr p15, 0, %0, c7, c5, 0": :"r" (0));
-}
diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
index 684f2d2..7d17f1e 100644
--- a/arch/arm/cpu/armv7/start.S
+++ b/arch/arm/cpu/armv7/start.S
@@ -241,6 +241,14 @@  clbss_l:str	r2, [r0]		/* clear loop...		    */
  * initialization, now running from RAM.
  */
 jump_2_ram:
+/*
+ * If I-cache is enabled invalidate it
+ */
+#ifndef CONFIG_SYS_NO_ICACHE
+	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
+	dsb
+	isb
+#endif
 	ldr	r0, _board_init_r_ofs
 	adr	r1, _start
 	add	lr, r0, r1
@@ -276,6 +284,9 @@  cpu_init_crit:
 	mov	r0, #0			@ set up for MCR
 	mcr	p15, 0, r0, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
+	mcr	p15, 0, r0, c7, c5, 6  /* invalidate BP array */
+	dsb
+	isb
 
 	/*
 	 * disable MMU stuff and caches
@@ -284,7 +295,12 @@  cpu_init_crit:
 	bic	r0, r0, #0x00002000	@ clear bits 13 (--V-)
 	bic	r0, r0, #0x00000007	@ clear bits 2:0 (-CAM)
 	orr	r0, r0, #0x00000002	@ set bit 1 (--A-) Align
-	orr	r0, r0, #0x00000800	@ set bit 12 (Z---) BTB
+	orr	r0, r0, #0x00000800	@ set bit 11 (Z---) BTB
+#ifdef CONFIG_SYS_NO_ICACHE
+	bic	r0, r0, #0x00001000	@ clear bit 12 (I) I-cache
+#else
+	orr	r0, r0, #0x00001000	@ set bit 12 (I) I-cache
+#endif
 	mcr	p15, 0, r0, c1, c0, 0
 
 	/*
diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
index 7266381..bef32a6 100644
--- a/arch/arm/lib/board.c
+++ b/arch/arm/lib/board.c
@@ -459,6 +459,12 @@  void board_init_r (gd_t *id, ulong dest_addr)
 
 	gd->flags |= GD_FLG_RELOC;	/* tell others: relocation done */
 
+	/*
+	 * Enable D$:
+	 * I$, if needed, must be already enabled in start.S
+	 */
+	dcache_enable();
+
 	monitor_flash_len = _bss_start_ofs;
 	debug ("monitor flash len: %08lX\n", monitor_flash_len);
 	board_init();	/* Setup chipselects */
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c
index d9175f0..ca526fb 100644
--- a/arch/arm/lib/cache-cp15.c
+++ b/arch/arm/lib/cache-cp15.c
@@ -34,6 +34,14 @@ 
 
 DECLARE_GLOBAL_DATA_PTR;
 
+void __arm_init_before_mmu(void)
+{
+	puts("__arm_init_before_mmu: dummy implementation! "
+	     "real implementation missing!!\n");
+}
+void arm_init_before_mmu(void)
+	__attribute__((weak, alias("__arm_init_before_mmu")));
+
 static void cp_delay (void)
 {
 	volatile int i;
@@ -65,6 +73,7 @@  static inline void mmu_setup(void)
 	int i;
 	u32 reg;
 
+	arm_init_before_mmu();
 	/* Set up an identity-mapping for all 4GB, rw for everyone */
 	for (i = 0; i < 4096; i++)
 		page_table[i] = i << 20 | (3 << 10) | 0x12;