[3/4] core/opal: Emergency stack for re-entry

Message ID 20180408064939.18879-4-npiggin@gmail.com
State Accepted
Headers show
Series
  • next round of OPAL debugging improvements
Related show

Commit Message

Nicholas Piggin April 8, 2018, 6:49 a.m.
This detects OPAL being re-entered by the OS, and switches to an
emergency stack if it was. This protects the firmware's main stack
from re-entrancy and allows the OS to use NMI facilities for crash
/ debug functionality.

Further nested re-entry will destroy the previous emergency stack
and prevent returning, but those should be rare cases.

This stack is sized at 16kB, which doubles the size of CPU stacks,
so as not to introduce a regression in primary stack size. The 16kB
stack originally had a 4kB machine check stack at the top, which was
removed by 80eee1946 ("opal: Remove machine check interrupt patching
in OPAL."). So it is possible the size could be tightened again, but
that would require further analysis.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 asm/head.S        | 22 ++++++++++++++++++----
 core/cpu.c        | 13 ++++++++-----
 core/stack.c      | 13 +++++++++++--
 include/cpu.h     |  1 +
 include/mem-map.h | 10 ++++++----
 include/stack.h   |  9 ++++++++-
 6 files changed, 52 insertions(+), 16 deletions(-)

Patch

diff --git a/asm/head.S b/asm/head.S
index eeefcaa4..48d3acb0 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -30,12 +30,17 @@ 
 
 #define PPC_INST_STOP		.long 0x4c0002e4
 
-#define GET_STACK(stack_reg,pir_reg)				\
-	sldi	stack_reg,pir_reg,STACK_SHIFT;			\
-	addis	stack_reg,stack_reg,CPU_STACKS_OFFSET@ha;	\
+#define GET_STACK(stack_reg,pir_reg)					\
+	sldi	stack_reg,pir_reg,STACK_SHIFT;				\
+	addis	stack_reg,stack_reg,CPU_STACKS_OFFSET@ha;		\
 	addi	stack_reg,stack_reg,CPU_STACKS_OFFSET@l;
 
-#define GET_CPU()						\
+#define GET_EMERGENCY_STACK(stack_reg,pir_reg)				\
+	sldi	stack_reg,pir_reg,STACK_SHIFT;				\
+	addis	stack_reg,stack_reg,EMERGENCY_CPU_STACKS_OFFSET@ha;	\
+	addi	stack_reg,stack_reg,EMERGENCY_CPU_STACKS_OFFSET@l;
+
+#define GET_CPU()							\
 	clrrdi	%r13,%r1,STACK_SHIFT
 
 #define SAVE_GPR(reg,sp)	std %r##reg,STACK_GPR##reg(sp)
@@ -1013,8 +1018,17 @@  opal_entry:
 	b	1b
 
 4:	/* Quiesce protocol done, get our per CPU stack */
+	/* Emergency stack if we have re-entered OPAL */
+	lwz	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+	cmpwi	%r11,1
+
 	mfspr	%r12,SPR_PIR
+	beq	5f
 	GET_STACK(%r12,%r12)
+	b	6f
+5:
+	GET_EMERGENCY_STACK(%r12,%r12)
+6:
 	stdu	%r12,-STACK_FRAMESIZE(%r12)
 
 	/* Save caller r1, establish new r1 */
diff --git a/core/cpu.c b/core/cpu.c
index 6826fee0..eb406ae2 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -79,15 +79,18 @@  unsigned long __attrconst cpu_stack_bottom(unsigned int pir)
 
 unsigned long __attrconst cpu_stack_top(unsigned int pir)
 {
-	/* This is the top of the MC stack which is above the normal
-	 * stack, which means a SP between cpu_stack_bottom() and
-	 * cpu_stack_top() can either be a normal stack pointer or
-	 * a Machine Check stack pointer
-	 */
+	/* This is the top of the normal stack. */
 	return ((unsigned long)&cpu_stacks[pir]) +
 		NORMAL_STACK_SIZE - STACK_TOP_GAP;
 }
 
+unsigned long __attrconst cpu_emergency_stack_top(unsigned int pir)
+{
+	/* This is the top of the emergency stack, above the normal stack. */
+	return ((unsigned long)&cpu_stacks[pir]) +
+		NORMAL_STACK_SIZE + EMERGENCY_STACK_SIZE - STACK_TOP_GAP;
+}
+
 static void cpu_wake(struct cpu_thread *cpu)
 {
 	/* Is it idle ? If not, no need to wake */
diff --git a/core/stack.c b/core/stack.c
index 10118e42..73700ce5 100644
--- a/core/stack.c
+++ b/core/stack.c
@@ -71,7 +71,7 @@  void ___print_backtrace(unsigned int pir, struct bt_entry *entries,
 	static char bt_text_buf[4096];
 	int i, l = 0, max;
 	char *buf = out_buf;
-	unsigned long bottom, top, tbot, ttop;
+	unsigned long bottom, top, normal_top, tbot, ttop;
 	char mark;
 
 	if (!out_buf) {
@@ -81,7 +81,8 @@  void ___print_backtrace(unsigned int pir, struct bt_entry *entries,
 		max = *len - 1;
 
 	bottom = cpu_stack_bottom(pir);
-	top = cpu_stack_top(pir);
+	normal_top = cpu_stack_top(pir);
+	top = cpu_emergency_stack_top(pir);
 	tbot = SKIBOOT_BASE;
 	ttop = (unsigned long)&_etext;
 
@@ -89,6 +90,8 @@  void ___print_backtrace(unsigned int pir, struct bt_entry *entries,
 	for (i = 0; i < count && l < max; i++) {
 		if (entries->sp < bottom || entries->sp > top)
 			mark = '!';
+		else if (entries->sp > normal_top)
+			mark = 'E';
 		else if (entries->pc < tbot || entries->pc > ttop)
 			mark = '*';
 		else
@@ -161,6 +164,12 @@  void __nomcount __mcount_stack_check(uint64_t sp, uint64_t lr)
 	int64_t mark = sp - bot;
 	uint64_t top = base + NORMAL_STACK_SIZE;
 
+	/*
+	 * Don't check the emergency stack just yet.
+	 */
+	if (c->in_opal_call > 1)
+		return;
+
 	/*
 	 * Don't re-enter on this CPU or don't enter at all if somebody
 	 * has spotted an overflow
diff --git a/include/cpu.h b/include/cpu.h
index 92ba83e8..d40aff96 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -311,6 +311,7 @@  static inline void cpu_give_self_os(void)
 
 extern unsigned long __attrconst cpu_stack_bottom(unsigned int pir);
 extern unsigned long __attrconst cpu_stack_top(unsigned int pir);
+extern unsigned long __attrconst cpu_emergency_stack_top(unsigned int pir);
 
 extern void cpu_idle_job(void);
 extern void cpu_idle_delay(unsigned long delay);
diff --git a/include/mem-map.h b/include/mem-map.h
index d2bc23fc..3e30d9b7 100644
--- a/include/mem-map.h
+++ b/include/mem-map.h
@@ -23,10 +23,10 @@ 
  */
 #define SKIBOOT_BASE		0x30000000
 
-/* Stack size set to 16K, some of it will be used for
- * machine check (see stack.h)
+/* Stack size set to 32K, 16K for general stack and 16K for an emergency
+ * stack.
  */
-#define STACK_SHIFT		14
+#define STACK_SHIFT		15
 #define STACK_SIZE		(1 << STACK_SHIFT)
 
 /* End of the exception region we copy from 0x0. 0x0-0x100 will have
@@ -109,7 +109,9 @@ 
  * each stack is STACK_SIZE in size (naturally aligned power of
  * two) and the bottom of the stack contains the cpu thread
  * structure for the processor, so it can be obtained by a simple
- * bit mask from the stack pointer.
+ * bit mask from the stack pointer. Within the CPU stack is divided
+ * into a normal and emergency stack to cope with a single level of
+ * re-entrancy.
  *
  * The size of this array is dynamically determined at boot time
  */
diff --git a/include/stack.h b/include/stack.h
index 4d3e504d..a41a4a91 100644
--- a/include/stack.h
+++ b/include/stack.h
@@ -28,12 +28,19 @@ 
 #define STACK_TOP_GAP		0x100
 
 /* Remaining stack space (gap included) */
-#define NORMAL_STACK_SIZE	STACK_SIZE
+#define NORMAL_STACK_SIZE	(STACK_SIZE/2)
+
+/* Emergency (re-entry) stack size */
+#define EMERGENCY_STACK_SIZE	(STACK_SIZE/2)
 
 /* Offset to get to normal CPU stacks */
 #define CPU_STACKS_OFFSET	(CPU_STACKS_BASE + \
 				 NORMAL_STACK_SIZE - STACK_TOP_GAP)
 
+/* Offset to get to emergency CPU stacks */
+#define EMERGENCY_CPU_STACKS_OFFSET	(CPU_STACKS_BASE + NORMAL_STACK_SIZE + \
+				 EMERGENCY_STACK_SIZE - STACK_TOP_GAP)
+
 /* Gap below the stack. If our stack checker sees the stack below that
  * gap, it will flag a stack overflow
  */