Patchwork [11/39] powerpc: endian safe trampoline

login
register
mail settings
Submitter Anton Blanchard
Date Sept. 23, 2013, 2:04 a.m.
Message ID <1379901913-5945-12-git-send-email-anton@samba.org>
Download mbox | patch
Permalink /patch/277054/
State Accepted, archived
Commit 5c0484e25ec03243d4c2f2d4416d4a13efc77f6a
Delegated to: Benjamin Herrenschmidt
Headers show

Comments

Anton Blanchard - Sept. 23, 2013, 2:04 a.m.
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Create a trampoline that works in either endian and flips to
the expected endian. Use it for primary and secondary thread
entry as well as RTAS and OF call return.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Anton Blanchard <anton@samba.org>
---
 arch/powerpc/include/asm/ppc_asm.h | 31 ++++++++++++++++++++++++++++++-
 arch/powerpc/kernel/entry_64.S     | 36 ++++++++++++++++++++----------------
 arch/powerpc/kernel/head_64.S      |  3 +++
 3 files changed, 53 insertions(+), 17 deletions(-)
Olof Johansson - Dec. 28, 2013, 7:24 a.m.
Nothing like some holiday powerpc early boot debugging. :-)

On Sun, Sep 22, 2013 at 7:04 PM, Anton Blanchard <anton@samba.org> wrote:
> From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>
> Create a trampoline that works in either endian and flips to
> the expected endian. Use it for primary and secondary thread
> entry as well as RTAS and OF call return.
>
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Signed-off-by: Anton Blanchard <anton@samba.org>

Some users reported failure with their kernel config to build and run
3.13-rc5. pasemi_defconfig boots just fine, but it seems that
ppc64_defconfig does not.

I've bisected it down to this particular patch. More below.


> diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
> index 0c51fb4..ce05bba 100644
> --- a/arch/powerpc/include/asm/ppc_asm.h
> +++ b/arch/powerpc/include/asm/ppc_asm.h
> @@ -845,6 +845,35 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946)
>  #define N_SLINE        68
>  #define N_SO   100
>
> -#endif /*  __ASSEMBLY__ */
> +/*
> + * Create an endian fixup trampoline
> + *
> + * This starts with a "tdi 0,0,0x48" instruction which is
> + * essentially a "trap never", and thus akin to a nop.
> + *
> + * The opcode for this instruction read with the wrong endian
> + * however results in a b . + 8
> + *
> + * So essentially we use that trick to execute the following
> + * trampoline in "reverse endian" if we are running with the
> + * MSR_LE bit set the "wrong" way for whatever endianness the
> + * kernel is built for.
> + */
>
> +#ifdef CONFIG_PPC_BOOK3E
> +#define FIXUP_ENDIAN
> +#else
> +#define FIXUP_ENDIAN                                              \
> +       tdi   0,0,0x48;   /* Reverse endian of b . + 8          */ \
> +       b     $+36;       /* Skip trampoline if endian is good  */ \
> +       .long 0x05009f42; /* bcl 20,31,$+4                      */ \
> +       .long 0xa602487d; /* mflr r10                           */ \
> +       .long 0x1c004a39; /* addi r10,r10,28                    */ \
> +       .long 0xa600607d; /* mfmsr r11                          */ \
> +       .long 0x01006b69; /* xori r11,r11,1                     */ \
> +       .long 0xa6035a7d; /* mtsrr0 r10                         */ \
> +       .long 0xa6037b7d; /* mtsrr1 r11                         */ \
> +       .long 0x2400004c  /* rfid                               */

So, the combination of this sequence and its execution at __start
seems to be what's causing problems for me. I've both commented out
and nopped out the second instruction and the handcoded LE opcodes,
and it makes no difference -- it seems to be the execution of the tdi
that traps or causes some other problem. And yeah -- it definitely
shouldn't since TO is 0. Likewise if I replace the tdi with nop and
keep the rest it runs fine.

I'm a bit baffled that there's a difference based on what config is
booted. I'm booting a raw vmlinux from a BE firmware. Initial code
sequence is largely the same for both configs.

Unfortunately I no longer have a JTAG environment such that I can
check the value of r0 at the time of tdi execution. Still, based on
docs it shouldn't matter since there are no TO bits set. I don't see a
rational reason to why the above should fail. And indeed, if I load a
known-different value and set TO=4, I still see failures. Etc.

I also tried moving the FIXUP_ENDIAN call until after the branch to
__start_initialization_multiplatform but before 64-bit enable and
there it runs just fine.

So, I'm really quite baffled right now. Any ideas would be welcome,
I'm sort of stumped. In particular since pasemi_defconfig boots fine.


-Olof
Benjamin Herrenschmidt - Dec. 28, 2013, 7:58 a.m.
On Fri, 2013-12-27 at 23:24 -0800, Olof Johansson wrote:
> 
> So, I'm really quite baffled right now. Any ideas would be welcome,
> I'm sort of stumped. In particular since pasemi_defconfig boots fine.

So you are coming in with "good" endian... what if you stick an
isync in there just for fun ? I wonder if it prefetches the "bad"
instructions and somewhat gets itself confused.

Also did you try sticking a b . around 0x700 see if that's where it's
going and what condition gets set ?

Cheers,
Ben.

Patch

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 0c51fb4..ce05bba 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -845,6 +845,35 @@  END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946)
 #define N_SLINE	68
 #define N_SO	100
 
-#endif /*  __ASSEMBLY__ */
+/*
+ * Create an endian fixup trampoline
+ *
+ * This starts with a "tdi 0,0,0x48" instruction which is
+ * essentially a "trap never", and thus akin to a nop.
+ *
+ * The opcode for this instruction read with the wrong endian
+ * however results in a b . + 8
+ *
+ * So essentially we use that trick to execute the following
+ * trampoline in "reverse endian" if we are running with the
+ * MSR_LE bit set the "wrong" way for whatever endianness the
+ * kernel is built for.
+ */
 
+#ifdef CONFIG_PPC_BOOK3E
+#define FIXUP_ENDIAN
+#else
+#define FIXUP_ENDIAN						   \
+	tdi   0,0,0x48;	  /* Reverse endian of b . + 8		*/ \
+	b     $+36;	  /* Skip trampoline if endian is good	*/ \
+	.long 0x05009f42; /* bcl 20,31,$+4			*/ \
+	.long 0xa602487d; /* mflr r10				*/ \
+	.long 0x1c004a39; /* addi r10,r10,28			*/ \
+	.long 0xa600607d; /* mfmsr r11				*/ \
+	.long 0x01006b69; /* xori r11,r11,1			*/ \
+	.long 0xa6035a7d; /* mtsrr0 r10				*/ \
+	.long 0xa6037b7d; /* mtsrr1 r11				*/ \
+	.long 0x2400004c  /* rfid				*/
+#endif /* !CONFIG_PPC_BOOK3E */
+#endif /*  __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PPC_ASM_H */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c04cdf7..889ea2b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1017,7 +1017,7 @@  _GLOBAL(enter_rtas)
 	
         li      r9,1
         rldicr  r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
-	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI
+	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
 	andc	r6,r0,r9
 	sync				/* disable interrupts so SRR0/1 */
 	mtmsrd	r0			/* don't get trashed */
@@ -1032,6 +1032,8 @@  _GLOBAL(enter_rtas)
 	b	.	/* prevent speculative execution */
 
 _STATIC(rtas_return_loc)
+	FIXUP_ENDIAN
+
 	/* relocation is off at this point */
 	GET_PACA(r4)
 	clrldi	r4,r4,2			/* convert to realmode address */
@@ -1103,28 +1105,30 @@  _GLOBAL(enter_prom)
 	std	r10,_CCR(r1)
 	std	r11,_MSR(r1)
 
-	/* Get the PROM entrypoint */
-	mtlr	r4
+	/* Put PROM address in SRR0 */
+	mtsrr0	r4
+
+	/* Setup our trampoline return addr in LR */
+	bcl	20,31,$+4
+0:	mflr	r4
+	addi	r4,r4,(1f - 0b)
+       	mtlr	r4
 
-	/* Switch MSR to 32 bits mode
+	/* Prepare a 32-bit mode big endian MSR
 	 */
 #ifdef CONFIG_PPC_BOOK3E
 	rlwinm	r11,r11,0,1,31
-	mtmsr	r11
+	mtsrr1	r11
+	rfi
 #else /* CONFIG_PPC_BOOK3E */
-        mfmsr   r11
-        li      r12,1
-        rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
-        andc    r11,r11,r12
-        li      r12,1
-        rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
-        andc    r11,r11,r12
-        mtmsrd  r11
+	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
+	andc	r11,r11,r12
+	mtsrr1	r11
+	rfid
 #endif /* CONFIG_PPC_BOOK3E */
-        isync
 
-	/* Enter PROM here... */
-	blrl
+1:	/* Return from OF */
+	FIXUP_ENDIAN
 
 	/* Just make sure that r1 top 32 bits didn't get
 	 * corrupt by OF
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3d11d80..2ae41ab 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -68,6 +68,7 @@  _stext:
 _GLOBAL(__start)
 	/* NOP this out unconditionally */
 BEGIN_FTR_SECTION
+	FIXUP_ENDIAN
 	b	.__start_initialization_multiplatform
 END_FTR_SECTION(0, 1)
 
@@ -115,6 +116,7 @@  __run_at_load:
  */
 	.globl	__secondary_hold
 __secondary_hold:
+	FIXUP_ENDIAN
 #ifndef CONFIG_PPC_BOOK3E
 	mfmsr	r24
 	ori	r24,r24,MSR_RI
@@ -205,6 +207,7 @@  _GLOBAL(generic_secondary_thread_init)
  * as SCOM before entry).
  */
 _GLOBAL(generic_secondary_smp_init)
+	FIXUP_ENDIAN
 	mr	r24,r3
 	mr	r25,r4