diff mbox

[U-Boot,v2,1/3] Revert "arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code"

Message ID 1469198413-1157-2-git-send-email-apatterson@sightlogix.com
State Rejected
Delegated to: Tom Rini
Headers show

Commit Message

Sandy Patterson July 22, 2016, 2:40 p.m. UTC
Commit c09d29057a and df120142f36 break kernel loading on rock2 board.

console output:
Starting kernel ...

### ERROR ### Please RESET the board ###

This reverts commit c09d29057ab0b04db0857d319c6bff74de31b9c3.

Conflicts:
	arch/arm/cpu/armv7/cache_v7.c
	arch/arm/cpu/armv7/cache_v7_asm.S

Signed-off-by: Sandy Patterson <apatterson@sightlogix.com>
---

 arch/arm/cpu/armv7/Makefile                  |   2 +-
 arch/arm/cpu/armv7/cache_v7.c                | 114 +++++++++++++++++++-
 arch/arm/cpu/armv7/cache_v7_asm.S            | 154 ---------------------------
 arch/arm/mach-uniphier/arm32/lowlevel_init.S |  67 +++++++++++-
 4 files changed, 180 insertions(+), 157 deletions(-)
 delete mode 100644 arch/arm/cpu/armv7/cache_v7_asm.S

Comments

Tom Rini July 22, 2016, 8:39 p.m. UTC | #1
On Fri, Jul 22, 2016 at 10:40:11AM -0400, Sandy Patterson wrote:

> Commit c09d29057a and df120142f36 break kernel loading on rock2 board.
> 
> console output:
> Starting kernel ...
> 
> ### ERROR ### Please RESET the board ###
> 
> This reverts commit c09d29057ab0b04db0857d319c6bff74de31b9c3.
> 
> Conflicts:
> 	arch/arm/cpu/armv7/cache_v7.c
> 	arch/arm/cpu/armv7/cache_v7_asm.S
> 
> Signed-off-by: Sandy Patterson <apatterson@sightlogix.com>

So, we cannot do this.  The problem is two-fold.  First, newer GCCs will
cause failure to boot with the way the code was, which is how we found
this problem.  The second is that after some offline conversations with
ARM Ltd people (and some other smart folks too, these operations cannot
safely be done in C, there's just no way.  So we borrowed what the
kernel does.  You need to figure out what behavior the (old?) kernel is
relying on and either emulate that with a flag (like sunxi does, or at
least did, at one point for some other issues of correct behavior vs
existing kernels) or just not support those older kernels.
Sandy Patterson July 22, 2016, 8:40 p.m. UTC | #2
I see the problem with 4.7-rc6. Is there a different kernel I should test
with?

On Fri, Jul 22, 2016 at 4:39 PM, Tom Rini <trini@konsulko.com> wrote:

> On Fri, Jul 22, 2016 at 10:40:11AM -0400, Sandy Patterson wrote:
>
> > Commit c09d29057a and df120142f36 break kernel loading on rock2 board.
> >
> > console output:
> > Starting kernel ...
> >
> > ### ERROR ### Please RESET the board ###
> >
> > This reverts commit c09d29057ab0b04db0857d319c6bff74de31b9c3.
> >
> > Conflicts:
> >       arch/arm/cpu/armv7/cache_v7.c
> >       arch/arm/cpu/armv7/cache_v7_asm.S
> >
> > Signed-off-by: Sandy Patterson <apatterson@sightlogix.com>
>
> So, we cannot do this.  The problem is two-fold.  First, newer GCCs will
> cause failure to boot with the way the code was, which is how we found
> this problem.  The second is that after some offline conversations with
> ARM Ltd people (and some other smart folks too, these operations cannot
> safely be done in C, there's just no way.  So we borrowed what the
> kernel does.  You need to figure out what behavior the (old?) kernel is
> relying on and either emulate that with a flag (like sunxi does, or at
> least did, at one point for some other issues of correct behavior vs
> existing kernels) or just not support those older kernels.
>
> --
> Tom
>
Tom Rini July 22, 2016, 8:52 p.m. UTC | #3
On Fri, Jul 22, 2016 at 04:40:53PM -0400, Sandy Patterson wrote:

> I see the problem with 4.7-rc6. Is there a different kernel I should test
> with?

No, I was hoping (from some other threads I had skimmed) that this was a
problem only with an older 3.14.y.  You need to figure out where / why
the hang is happening.
diff mbox

Patch

diff --git a/arch/arm/cpu/armv7/Makefile b/arch/arm/cpu/armv7/Makefile
index 0d4bfbc..9d20f17 100644
--- a/arch/arm/cpu/armv7/Makefile
+++ b/arch/arm/cpu/armv7/Makefile
@@ -7,7 +7,7 @@ 
 
 extra-y	:= start.o
 
-obj-y	+= cache_v7.o cache_v7_asm.o
+obj-y	+= cache_v7.o
 
 obj-y	+= cpu.o cp15.o
 obj-y	+= syslib.o
diff --git a/arch/arm/cpu/armv7/cache_v7.c b/arch/arm/cpu/armv7/cache_v7.c
index 52f1856..9fbabb4 100644
--- a/arch/arm/cpu/armv7/cache_v7.c
+++ b/arch/arm/cpu/armv7/cache_v7.c
@@ -28,6 +28,118 @@  static u32 get_ccsidr(void)
 	return ccsidr;
 }
 
+static u32 get_clidr(void)
+{
+	u32 clidr;
+
+	/* Read current CP15 Cache Level ID Register */
+	asm volatile ("mrc p15,1,%0,c0,c0,1" : "=r" (clidr));
+	return clidr;
+}
+
+static void v7_inval_dcache_level_setway(u32 level, u32 num_sets,
+					 u32 num_ways, u32 way_shift,
+					 u32 log2_line_len)
+{
+	int way, set;
+	u32 setway;
+
+	/*
+	 * For optimal assembly code:
+	 *	a. count down
+	 *	b. have bigger loop inside
+	 */
+	for (way = num_ways - 1; way >= 0 ; way--) {
+		for (set = num_sets - 1; set >= 0; set--) {
+			setway = (level << 1) | (set << log2_line_len) |
+				 (way << way_shift);
+			/* Invalidate data/unified cache line by set/way */
+			asm volatile ("	mcr p15, 0, %0, c7, c6, 2"
+					: : "r" (setway));
+		}
+	}
+	/* DSB to make sure the operation is complete */
+	DSB;
+}
+
+static void v7_clean_inval_dcache_level_setway(u32 level, u32 num_sets,
+					       u32 num_ways, u32 way_shift,
+					       u32 log2_line_len)
+{
+	int way, set;
+	u32 setway;
+
+	/*
+	 * For optimal assembly code:
+	 *	a. count down
+	 *	b. have bigger loop inside
+	 */
+	for (way = num_ways - 1; way >= 0 ; way--) {
+		for (set = num_sets - 1; set >= 0; set--) {
+			setway = (level << 1) | (set << log2_line_len) |
+				 (way << way_shift);
+			/*
+			 * Clean & Invalidate data/unified
+			 * cache line by set/way
+			 */
+			asm volatile ("	mcr p15, 0, %0, c7, c14, 2"
+					: : "r" (setway));
+		}
+	}
+	/* DSB to make sure the operation is complete */
+	DSB;
+}
+
+static void v7_maint_dcache_level_setway(u32 level, u32 operation)
+{
+	u32 ccsidr;
+	u32 num_sets, num_ways, log2_line_len, log2_num_ways;
+	u32 way_shift;
+
+	set_csselr(level, ARMV7_CSSELR_IND_DATA_UNIFIED);
+
+	ccsidr = get_ccsidr();
+
+	log2_line_len = ((ccsidr & CCSIDR_LINE_SIZE_MASK) >>
+				CCSIDR_LINE_SIZE_OFFSET) + 2;
+	/* Converting from words to bytes */
+	log2_line_len += 2;
+
+	num_ways  = ((ccsidr & CCSIDR_ASSOCIATIVITY_MASK) >>
+			CCSIDR_ASSOCIATIVITY_OFFSET) + 1;
+	num_sets  = ((ccsidr & CCSIDR_NUM_SETS_MASK) >>
+			CCSIDR_NUM_SETS_OFFSET) + 1;
+	/*
+	 * According to ARMv7 ARM number of sets and number of ways need
+	 * not be a power of 2
+	 */
+	log2_num_ways = log_2_n_round_up(num_ways);
+
+	way_shift = (32 - log2_num_ways);
+	if (operation == ARMV7_DCACHE_INVAL_ALL) {
+		v7_inval_dcache_level_setway(level, num_sets, num_ways,
+					     way_shift, log2_line_len);
+	} else if (operation == ARMV7_DCACHE_CLEAN_INVAL_ALL) {
+		v7_clean_inval_dcache_level_setway(level, num_sets, num_ways,
+						   way_shift, log2_line_len);
+	}
+}
+
+static void v7_maint_dcache_all(u32 operation)
+{
+	u32 level, cache_type, level_start_bit = 0;
+	u32 clidr = get_clidr();
+
+	for (level = 0; level < 7; level++) {
+		cache_type = (clidr >> level_start_bit) & 0x7;
+		if ((cache_type == ARMV7_CLIDR_CTYPE_DATA_ONLY) ||
+		    (cache_type == ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA) ||
+		    (cache_type == ARMV7_CLIDR_CTYPE_UNIFIED))
+			v7_maint_dcache_level_setway(level, operation);
+		level_start_bit += 3;
+	}
+}
+
 static void v7_dcache_clean_inval_range(u32 start, u32 stop, u32 line_len)
 {
 	u32 mva;
@@ -106,7 +218,7 @@  void invalidate_dcache_all(void)
  */
 void flush_dcache_all(void)
 {
-	v7_flush_dcache_all();
+	v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL);
 
 	v7_outer_cache_flush_all();
 }
diff --git a/arch/arm/cpu/armv7/cache_v7_asm.S b/arch/arm/cpu/armv7/cache_v7_asm.S
deleted file mode 100644
index a433628..0000000
--- a/arch/arm/cpu/armv7/cache_v7_asm.S
+++ /dev/null
@@ -1,154 +0,0 @@ 
-/*
- * SPDX-License-Identifier:	GPL-2.0+
- */
-
-#include <config.h>
-#include <linux/linkage.h>
-#include <linux/sizes.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_SYS_THUMB_BUILD
-#define ARM(x...)
-#define THUMB(x...)	x
-#else
-#define ARM(x...)	x
-#define THUMB(x...)
-#endif
-
-/*
- *	v7_flush_dcache_all()
- *
- *	Flush the whole D-cache.
- *
- *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
- *
- *	Note: copied from arch/arm/mm/cache-v7.S of Linux 4.4
- */
-ENTRY(__v7_flush_dcache_all)
-	dmb					@ ensure ordering with previous memory accesses
-	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	mov	r3, r0, lsr #23			@ move LoC into position
-	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
-	beq	finished			@ if loc is 0, then no need to clean
-start_flush_levels:
-	mov	r10, #0				@ start clean at cache level 0
-flush_levels:
-	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
-	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
-	and	r1, r1, #7			@ mask of the bits for current cache only
-	cmp	r1, #2				@ see what cache we have at this level
-	blt	skip				@ skip if no cache, or just i-cache
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-	isb					@ isb to sych the new cssr&csidr
-	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
-	and	r2, r1, #7			@ extract the length of the cache lines
-	add	r2, r2, #4			@ add 4 (line length offset)
-	movw	r4, #0x3ff
-	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
-	clz	r5, r4				@ find bit position of way size increment
-	movw	r7, #0x7fff
-	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
-loop1:
-	mov	r9, r7				@ create working copy of max index
-loop2:
- ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
- THUMB(	lsl	r6, r4, r5		)
- THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
- ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
- THUMB(	lsl	r6, r9, r2		)
- THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
-	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
-	subs	r9, r9, #1			@ decrement the index
-	bge	loop2
-	subs	r4, r4, #1			@ decrement the way
-	bge	loop1
-skip:
-	add	r10, r10, #2			@ increment cache number
-	cmp	r3, r10
-	bgt	flush_levels
-finished:
-	mov	r10, #0				@ swith back to cache level 0
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-	dsb	st
-	isb
-	bx	lr
-ENDPROC(__v7_flush_dcache_all)
-
-ENTRY(v7_flush_dcache_all)
- ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
-	bl	__v7_flush_dcache_all
- ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
-	bx	lr
-ENDPROC(v7_flush_dcache_all)
-
-/*
- *	v7_invalidate_dcache_all()
- *
- *	Invalidate the whole D-cache.
- *
- *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
- *
- *	Note: copied from __v7_flush_dcache_all above with
- *	mcr     p15, 0, r11, c7, c14, 2
- *	Replaced with:
- *	mcr     p15, 0, r11, c7, c6, 2
- */
-ENTRY(__v7_invalidate_dcache_all)
-	dmb					@ ensure ordering with previous memory accesses
-	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	mov	r3, r0, lsr #23			@ move LoC into position
-	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
-	beq	inval_finished			@ if loc is 0, then no need to clean
-	mov	r10, #0				@ start clean at cache level 0
-inval_levels:
-	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
-	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
-	and	r1, r1, #7			@ mask of the bits for current cache only
-	cmp	r1, #2				@ see what cache we have at this level
-	blt	inval_skip			@ skip if no cache, or just i-cache
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-	isb					@ isb to sych the new cssr&csidr
-	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
-	and	r2, r1, #7			@ extract the length of the cache lines
-	add	r2, r2, #4			@ add 4 (line length offset)
-	movw	r4, #0x3ff
-	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
-	clz	r5, r4				@ find bit position of way size increment
-	movw	r7, #0x7fff
-	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
-inval_loop1:
-	mov	r9, r7				@ create working copy of max index
-inval_loop2:
- ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
- THUMB(	lsl	r6, r4, r5		)
- THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
- ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
- THUMB(	lsl	r6, r9, r2		)
- THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
-	mcr	p15, 0, r11, c7, c6, 2		@ invalidate by set/way
-	subs	r9, r9, #1			@ decrement the index
-	bge	inval_loop2
-	subs	r4, r4, #1			@ decrement the way
-	bge	inval_loop1
-inval_skip:
-	add	r10, r10, #2			@ increment cache number
-	cmp	r3, r10
-	bgt	inval_levels
-inval_finished:
-	mov	r10, #0				@ swith back to cache level 0
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-	dsb	st
-	isb
-	bx	lr
-ENDPROC(__v7_invalidate_dcache_all)
-
-ENTRY(v7_invalidate_dcache_all)
- ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
-	bl	__v7_invalidate_dcache_all
- ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
-	bx	lr
-ENDPROC(v7_invalidate_dcache_all)
diff --git a/arch/arm/mach-uniphier/arm32/lowlevel_init.S b/arch/arm/mach-uniphier/arm32/lowlevel_init.S
index cc34116..6f05abf 100644
--- a/arch/arm/mach-uniphier/arm32/lowlevel_init.S
+++ b/arch/arm/mach-uniphier/arm32/lowlevel_init.S
@@ -38,7 +38,7 @@  ENTRY(lowlevel_init)
 	 * to do next is to create a page table and switch over to it.
 	 */
 	bl	create_page_table
-	bl	__v7_flush_dcache_all
+	bl	v7_flush_dcache_all
 
 	/* Disable MMU and Dcache before switching Page Table */
 	mrc	p15, 0, r0, c1, c0, 0	@ SCTLR (System Control Register)
@@ -140,3 +140,68 @@  ENTRY(create_page_table)
 	str	r0, [r12, #4]		@ mark the second section as Normal
 	mov	pc, lr
 ENDPROC(create_page_table)
+
+/* We don't use Thumb instructions for now */
+#define ARM(x...)	x
+#define THUMB(x...)
+
+/*
+ *	v7_flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ *
+ *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ *
+ *	- mm    - mm_struct describing address space
+ *
+ *	Note: copied from arch/arm/mm/cache-v7.S of Linux 4.4
+ */
+ENTRY(v7_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
+	beq	finished			@ if loc is 0, then no need to clean
+start_flush_levels:
+	mov	r10, #0				@ start clean at cache level 0
+flush_levels:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
+	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
+	and	r1, r1, #7			@ mask of the bits for current cache only
+	cmp	r1, #2				@ see what cache we have at this level
+	blt	skip				@ skip if no cache, or just i-cache
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	isb					@ isb to sych the new cssr&csidr
+	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+	and	r2, r1, #7			@ extract the length of the cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	movw	r4, #0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
+	clz	r5, r4				@ find bit position of way size increment
+	movw	r7, #0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+loop1:
+	mov	r9, r7				@ create working copy of max index
+loop2:
+ ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
+ THUMB(	lsl	r6, r4, r5		)
+ THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
+ ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
+ THUMB(	lsl	r6, r9, r2		)
+ THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
+	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
+	subs	r9, r9, #1			@ decrement the index
+	bge	loop2
+	subs	r4, r4, #1			@ decrement the way
+	bge	loop1
+skip:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	flush_levels
+finished:
+	mov	r10, #0				@ swith back to cache level 0
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	dsb	st
+	isb
+	mov	pc, lr
+ENDPROC(v7_flush_dcache_all)