diff mbox series

[RFC] skiboot machine check handler

Message ID 20191211100118.544-1-npiggin@gmail.com (mailing list archive)
State Not Applicable
Headers show
Series [RFC] skiboot machine check handler | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch powerpc/merge (42159d2de18ffa66c2714d988a8c162db8b03956)
snowpatch_ozlabs/apply_patch warning Failed to apply on branch powerpc/next (7794b1d4185e2587af46435e3e2f6696dae314c7)
snowpatch_ozlabs/apply_patch warning Failed to apply on branch linus/master (6794862a16ef41f753abd75c03a152836e4c8028)
snowpatch_ozlabs/apply_patch warning Failed to apply on branch powerpc/fixes (249fad734a25889a4f23ed014d43634af6798063)
snowpatch_ozlabs/apply_patch warning Failed to apply on branch linux-next (938f49c85b36076b19251b316eeaa5435c50ff6e)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Nicholas Piggin Dec. 11, 2019, 10:01 a.m. UTC
Provide facilities to decode machine checks into human readable
strings, with only sufficient information required to deal with
them sanely.

The old machine check stuff was over engineered. The philosophy
here is that OPAL should correct anything it possibly can, what
it can't handle but the OS might be able to do something with
(e.g., uncorrected memory error or SLB multi-hit), it passes back
to Linux. Anything else, the OS doesn't care. It doesn't want a
huge struct of severities and levels and originators etc that it
can't do anything with -- just provide human readable strings
for what happened and what was done with it.

A Linux driver for this will be able to cope with new processors.

This also uses the same facility to decode machine checks in OPAL
boot.

The code is a bit in flux because it's sitting on top of a few
other RFC patches and not quite complete, just wanted opinions
about it.
---
 core/Makefile.inc  |   2 +-
 core/exceptions.c  |  28 ++++-
 core/mce.c         | 306 +++++++++++++++++++++++++++++++++++++++++++++
 include/opal-api.h |  41 +++++-
 include/skiboot.h  |   6 +
 5 files changed, 379 insertions(+), 4 deletions(-)
 create mode 100644 core/mce.c

Comments

Mahesh J Salgaonkar Jan. 16, 2020, 7:03 a.m. UTC | #1
On 2019-12-11 20:01:18 Wed, Nicholas Piggin wrote:
> Provide facilities to decode machine checks into human readable
> strings, with only sufficient information required to deal with
> them sanely.
> 
> The old machine check stuff was over engineered. The philosophy
> here is that OPAL should correct anything it possibly can, what
> it can't handle but the OS might be able to do something with
> (e.g., uncorrected memory error or SLB multi-hit), it passes back
> to Linux. Anything else, the OS doesn't care. It doesn't want a
> huge struct of severities and levels and originators etc that it
> can't do anything with -- just provide human readable strings
> for what happened and what was done with it.
> 
> A Linux driver for this will be able to cope with new processors.
> 
> This also uses the same facility to decode machine checks in OPAL
> boot.
> 
> The code is a bit in flux because it's sitting on top of a few
> other RFC patches and not quite complete, just wanted opinions
> about it.

opal_handle_mce() may have to be treated as special opal call. For MCE
that occurs in OPAL context, Linux making opal call will clobber
original opal call stack which hit MCE. Same is true with nested MCE in
OPAL. Should it just continue using same r1 to avoid clobbering or have
a separate stack for mce opal call ?

Thanks,
-Mahesh.
Nicholas Piggin Jan. 21, 2020, 7:54 a.m. UTC | #2
Mahesh J Salgaonkar's on January 16, 2020 5:03 pm:
> On 2019-12-11 20:01:18 Wed, Nicholas Piggin wrote:
>> Provide facilities to decode machine checks into human readable
>> strings, with only sufficient information required to deal with
>> them sanely.
>> 
>> The old machine check stuff was over engineered. The philosophy
>> here is that OPAL should correct anything it possibly can, what
>> it can't handle but the OS might be able to do something with
>> (e.g., uncorrected memory error or SLB multi-hit), it passes back
>> to Linux. Anything else, the OS doesn't care. It doesn't want a
>> huge struct of severities and levels and originators etc that it
>> can't do anything with -- just provide human readable strings
>> for what happened and what was done with it.
>> 
>> A Linux driver for this will be able to cope with new processors.
>> 
>> This also uses the same facility to decode machine checks in OPAL
>> boot.
>> 
>> The code is a bit in flux because it's sitting on top of a few
>> other RFC patches and not quite complete, just wanted opinions
>> about it.
> 
> opal_handle_mce() may have to be treated as special opal call. For MCE
> that occurs in OPAL context, Linux making opal call will clobber
> original opal call stack which hit MCE. Same is true with nested MCE in
> OPAL. Should it just continue using same r1 to avoid clobbering or have
> a separate stack for mce opal call ?

Ah, it wasn't clear in my message, sorry: this would only be made
available to kernels which use the new calling convention where the
kernel provides its own stack for OPAL to use.

That may be controversial itself, that's another RFC but if we went
ahead with that approach, then handling re-entrant interrupts like
this becomes easy because Linux does all the hard work with NMI/MCE
stacks etc.

Thanks,
Nick
diff mbox series

Patch

diff --git a/core/Makefile.inc b/core/Makefile.inc
index c2b5251d7..cc90fb958 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -7,7 +7,7 @@  CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o vm.o
 CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
 CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
 CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
-CORE_OBJS += vpd.o platform.o nvram.o nvram-format.o hmi.o
+CORE_OBJS += vpd.o platform.o nvram.o nvram-format.o hmi.o mce.o
 CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
 CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
 CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
diff --git a/core/exceptions.c b/core/exceptions.c
index 66e8953ce..b04d15125 100644
--- a/core/exceptions.c
+++ b/core/exceptions.c
@@ -32,6 +32,7 @@  static void dump_regs(struct stack_frame *stack)
 
 #define EXCEPTION_MAX_STR 320
 
+#if 0
 static void print_recoverable_mce_vm(struct stack_frame *stack, uint64_t nip, uint64_t msr)
 {
 	char buf[EXCEPTION_MAX_STR];
@@ -46,6 +47,7 @@  static void print_recoverable_mce_vm(struct stack_frame *stack, uint64_t nip, ui
 	dump_regs(stack);
 	prerror("Continuing with VM off\n");
 }
+#endif
 
 void exception_entry(struct stack_frame *stack)
 {
@@ -103,7 +105,11 @@  void exception_entry(struct stack_frame *stack)
 		}
 		break;
 
-	case 0x200:
+	case 0x200: {
+		uint64_t mce_flags, mce_addr;
+		const char *mce_err;
+
+#if 0
 		if (this_cpu()->vm_local_map_inuse)
 			fatal = true; /* local map is non-linear */
 
@@ -114,12 +120,29 @@  void exception_entry(struct stack_frame *stack)
 			stack->srr1 &= ~(MSR_IR|MSR_DR);
 			goto out;
 		}
+#endif
 
 		fatal = true;
 		prerror("***********************************************\n");
 		l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
 			"Fatal MCE at "REG"   ", nip);
-		break;
+		l += snprintf_symbol(buf + l, EXCEPTION_MAX_STR - l, nip);
+		l += snprintf(buf + l, EXCEPTION_MAX_STR - l, "  MSR "REG, msr);
+		prerror("%s\n", buf);
+
+		decode_mce(stack->srr0, stack->srr1, stack->dsisr, stack->dar,
+				&mce_flags, &mce_err, &mce_addr);
+		l = 0;
+		l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+			"Cause: %s", mce_err);
+		prerror("%s\n", buf);
+		if (mce_flags & MCE_INVOLVED_EA) {
+			l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+				"Effective address: 0x%016llx", mce_addr);
+			prerror("%s\n", buf);
+		}
+		goto no_symbol;
+	}
 
 	case 0x300:
 		if (vm_dsi(nip, stack->dar, !!(stack->dsisr & DSISR_ISSTORE)))
@@ -195,6 +218,7 @@  void exception_entry(struct stack_frame *stack)
 	l += snprintf_symbol(buf + l, EXCEPTION_MAX_STR - l, nip);
 	l += snprintf(buf + l, EXCEPTION_MAX_STR - l, "  MSR "REG, msr);
 	prerror("%s\n", buf);
+no_symbol:
 	dump_regs(stack);
 	backtrace_r1((uint64_t)stack);
 	if (fatal) {
diff --git a/core/mce.c b/core/mce.c
new file mode 100644
index 000000000..0ebf98380
--- /dev/null
+++ b/core/mce.c
@@ -0,0 +1,306 @@ 
+// SPDX-License-Identifier: Apache-2.0
+/*
+ * Deal with Machine Check Exceptions
+ *
+ * Copyright 2019 IBM Corp.
+ */
+
+#define pr_fmt(fmt)	"MCE: " fmt
+
+#include <skiboot.h>
+#include <opal.h>
+#include <processor.h>
+#include <cpu.h>
+#include <cpu.h>
+
+static void flush_erat(void)
+{
+	asm volatile("slbia	7");
+}
+
+static void flush_tlb(void)
+{
+	/* XXX */
+}
+
+static void flush_all_tlb(void)
+{
+	/* XXX */
+}
+
+#define SRR1_MC_LOADSTORE(srr1)	((srr1) & PPC_BIT(42))
+
+#define CORRECTION_NONE			0
+#define CORRECTION_ERAT			1
+#define CORRECTION_TLB			2
+#define CORRECTION_TLBIE		3
+
+struct mce_ierror_table {
+	unsigned long srr1_mask;
+	unsigned long srr1_value;
+	uint64_t type;
+	const char *error_str;
+	int correction_type;
+};
+
+static const struct mce_ierror_table mce_p9_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000,
+  MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_INVOLVED_EA,
+  "instruction fetch memory uncorrectable error",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x0000000000080000,
+  MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA,
+  "instruction fetch SLB parity error",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x00000000000c0000,
+  MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA,
+  "instruction fetch SLB multi-hit error",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x0000000000100000,
+  MCE_INSNFETCH | MCE_INVOLVED_EA,
+  "instruction fetch ERAT multi-hit error",
+  CORRECTION_ERAT },
+{ 0x00000000081c0000, 0x0000000000140000,
+  MCE_INSNFETCH | MCE_INVOLVED_EA,
+  "instruction fetch TLB multi-hit error",
+  CORRECTION_TLB },
+{ 0x00000000081c0000, 0x0000000000180000,
+  MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+  "instruction fetch page table access memory uncorrectable error",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x00000000001c0000,
+  MCE_INSNFETCH | MCE_INVOLVED_EA,
+  "instruction fetch to foreign address",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x0000000008000000,
+  MCE_INSNFETCH | MCE_INVOLVED_EA,
+  "instruction fetch foreign link time-out",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x0000000008040000,
+  MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+  "instruction fetch page table access foreign link time-out",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x00000000080c0000,
+  MCE_INSNFETCH | MCE_INVOLVED_EA,
+  "instruction fetch real address error",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x0000000008100000,
+  MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+  "instruction fetch page table access real address error",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x0000000008140000,
+  MCE_LOADSTORE | MCE_IMPRECISE,
+  "store real address asynchronous error",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x0000000008180000,
+  MCE_LOADSTORE | MCE_IMPRECISE,
+  "store foreign link time-out asynchronous error",
+  CORRECTION_NONE },
+{ 0x00000000081c0000, 0x00000000081c0000,
+  MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+  "instruction fetch page table access to foreign address",
+  CORRECTION_NONE },
+{ 0 } };
+
+struct mce_derror_table {
+	unsigned long dsisr_value;
+	uint64_t type;
+	const char *error_str;
+	int correction_type;
+};
+
+static const struct mce_derror_table mce_p9_derror_table[] = {
+{ 0x00008000,
+  MCE_LOADSTORE | MCE_MEMORY_ERROR,
+  "load/store memory uncorrectable error",
+  CORRECTION_NONE },
+{ 0x00004000,
+  MCE_LOADSTORE | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+  "load/store page table access memory uncorrectable error",
+  CORRECTION_NONE },
+{ 0x00002000,
+  MCE_LOADSTORE | MCE_INVOLVED_EA,
+  "load/store foreign link time-out",
+  CORRECTION_NONE },
+{ 0x00001000,
+  MCE_LOADSTORE | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+  "load/store page table access foreign link time-out",
+  CORRECTION_NONE },
+{ 0x00000800,
+  MCE_LOADSTORE | MCE_INVOLVED_EA,
+  "load/store ERAT multi-hit error",
+  CORRECTION_ERAT },
+{ 0x00000400,
+  MCE_LOADSTORE | MCE_INVOLVED_EA,
+  "load/store TLB multi-hit error",
+  CORRECTION_TLB },
+{ 0x00000200,
+  MCE_LOADSTORE,
+  "TLBIE or TLBIEL instruction programming error",
+  CORRECTION_TLBIE },
+{ 0x00000100,
+  MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR,
+  "load/store SLB parity error", 
+  CORRECTION_NONE },
+{ 0x00000080,
+  MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR,
+  "load/store SLB multi-hit error",
+  CORRECTION_NONE },
+{ 0x00000040,
+  MCE_LOADSTORE | MCE_INVOLVED_EA,
+  "load real address error",
+  CORRECTION_NONE },
+{ 0x00000020,
+  MCE_LOADSTORE | MCE_TABLE_WALK,
+  "load/store page table access real address error",
+  CORRECTION_NONE },
+{ 0x00000010,
+  MCE_LOADSTORE,
+  "load/store to foreign address",
+  CORRECTION_NONE },
+{ 0x00000008,
+  MCE_LOADSTORE | MCE_TABLE_WALK,
+  "load/store page table access to foreign address",
+  CORRECTION_NONE },
+{ 0 } };
+
+static void decode_ierror(const struct mce_ierror_table table[],
+				uint64_t srr1,
+				uint64_t *type,
+				const char **error_str,
+				int *correction_type)
+{
+	int i;
+
+	for (i = 0; table[i].srr1_mask; i++) {
+		if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
+			continue;
+
+		*type = table[i].type;
+		*error_str = table[i].error_str;
+		*correction_type = table[i].correction_type;
+	}
+}
+
+static void decode_derror(const struct mce_derror_table table[],
+		uint32_t dsisr,
+		uint64_t *type,
+		const char **error_str,
+		int *correction_type)
+{
+	int i;
+
+	for (i = 0; table[i].dsisr_value; i++) {
+		if (!(dsisr & table[i].dsisr_value))
+			continue;
+
+		*type = table[i].type;
+		*error_str = table[i].error_str;
+		*correction_type = table[i].correction_type;
+	}
+}
+
+static void __decode_mce(uint64_t srr0, uint64_t srr1,
+			uint32_t dsisr, uint64_t dar,
+			uint64_t *type, const char **error_str,
+			uint64_t *address, int *correction_type)
+{
+	/*
+	 * On POWER9 DD2.1 and below, it's possible to get a machine check
+	 * caused by a paste instruction where only DSISR bit 25 is set. This
+	 * will result in the MCE handler seeing an unknown event and the kernel
+	 * crashing. An MCE that occurs like this is spurious, so we don't need
+	 * to do anything in terms of servicing it. If there is something that
+	 * needs to be serviced, the CPU will raise the MCE again with the
+	 * correct DSISR so that it can be serviced properly. So detect this
+	 * case and mark it as handled.
+	 */
+	if (SRR1_MC_LOADSTORE(srr1) && dsisr == 0x02000000) {
+		*type = MCE_NO_ERROR;
+		return;
+	}
+
+	*type = MCE_UNKNOWN;
+	*error_str = "unknown error";
+	*address = 0;
+	*correction_type = CORRECTION_NONE;
+
+	if (SRR1_MC_LOADSTORE(srr1)) {
+		decode_derror(mce_p9_derror_table,
+				dsisr, type, error_str, correction_type);
+		if (*type & MCE_INVOLVED_EA)
+			*address = dar;
+	} else {
+		decode_ierror(mce_p9_ierror_table,
+				srr1, type, error_str, correction_type);
+		if (*type & MCE_INVOLVED_EA)
+			*address = srr0;
+	}
+}
+
+void decode_mce(uint64_t srr0, uint64_t srr1,
+			uint32_t dsisr, uint64_t dar,
+			uint64_t *type, const char **error_str,
+			uint64_t *address)
+{
+	int correction_type;
+	__decode_mce(srr0, srr1, dsisr, dar, type, error_str, address,
+			&correction_type);
+}
+
+static int64_t opal_handle_mce(struct opal_mce *opal_mce)
+{
+	uint64_t flags = be64_to_cpu(opal_mce->flags);
+	int correction_type;
+	uint64_t type;
+	uint64_t ea;
+	uint64_t len;
+	const char *error_str;
+	const char *handled_str = ""; /* silence gcc warning */
+	char *outbuf;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	__decode_mce(	be64_to_cpu(opal_mce->srr0),
+			be64_to_cpu(opal_mce->srr1),
+			be64_to_cpu(opal_mce->dsisr),
+			be64_to_cpu(opal_mce->dar),
+			&type, &error_str, &ea, &correction_type);
+
+	if (flags & MCE_HANDLE_CORRECT) {
+		/* Attempt to correct */
+		// uint64_t insn = be64_to_cpu(opal_mce->insn);
+
+		if (correction_type == CORRECTION_ERAT) {
+			flush_erat();
+			flags |= MCE_HANDLE_CORRECTED;
+			handled_str = "ERAT flush";
+		} else if (correction_type == CORRECTION_TLB) {
+			flush_tlb();
+			flags |= MCE_HANDLE_CORRECTED;
+			handled_str = "TLB flush";
+		} else if (correction_type == CORRECTION_TLBIE) {
+			flush_all_tlb();
+			flags |= MCE_HANDLE_CORRECTED | MCE_HANDLE_EMULATED;
+			handled_str = "global TLB flush";
+		}
+	}
+
+	opal_mce->flags = cpu_to_be64(flags);
+	opal_mce->type = cpu_to_be64(type);
+	opal_mce->ea = cpu_to_be64(ea);
+
+	len = be64_to_cpu(opal_mce->errorlen);
+	outbuf = (char *)be64_to_cpu(opal_mce->errorbuf);
+	strncpy(outbuf, error_str, len);
+
+	if (flags & MCE_HANDLE_CORRECTED) {
+		len = be64_to_cpu(opal_mce->handledlen);
+		outbuf = (char *)be64_to_cpu(opal_mce->handledbuf);
+		strncpy(outbuf, handled_str, len);
+	}
+
+	return OPAL_SUCCESS;
+}
+opal_call(OPAL_HANDLE_MCE, opal_handle_mce, 1);
diff --git a/include/opal-api.h b/include/opal-api.h
index d7c2368a1..169061a26 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -230,7 +230,8 @@ 
 #define OPAL_GET_SYMBOL				181
 #define OPAL_LOOKUP_SYMBOL			182
 #define OPAL_REGISTER_OS_OPS			183
-#define OPAL_LAST				183
+#define OPAL_HANDLE_MCE				184
+#define OPAL_LAST				184
 
 #define QUIESCE_HOLD			1 /* Spin all calls at entry */
 #define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
@@ -1264,6 +1265,44 @@  struct opal_os_ops {
         __be64  os_printf;      /* void printf(int32_t level, const char *str) */
 };
 
+#define MCE_HANDLE_CORRECT		0x0001	/* Attempt to correct */
+#define MCE_HANDLE_CORRECTED		0x1000	/* Error attmped to be corrected */
+#define MCE_HANDLE_EMULATED		0x2000	/* Should advance NIA */
+#define MCE_HANDLE_NEED_INSN		0x4000	/* Try again with insn field */
+
+
+#define MCE_NO_ERROR			0x0001
+#define MCE_UNKNOWN			0x0002
+#define MCE_INSNFETCH			0x0004
+#define MCE_LOADSTORE			0x0008
+#define MCE_TABLE_WALK			0x0010
+#define MCE_IMPRECISE			0x0020
+#define MCE_MEMORY_ERROR		0x0040
+#define MCE_SLB_ERROR			0x0080
+#define MCE_INVOLVED_EA			0x0100
+#define MCE_INVOLVED_PA			0x0200
+
+struct opal_mce {
+	/* Input and output */
+	__be64	flags;	/* How it should be handled / how it was handled */
+
+	/* Inputs */
+	__be64	srr0;
+	__be64	srr1;
+	__be32	dsisr;
+	__be32	reserved;
+	__be64	dar;
+	__be64	insn;	/* Zero if instruction was not read */
+
+	/* Outputs */
+	__be64	type;
+	__be64	ea;
+	__be64	pa;
+	__be64	errorbuf;	/* pointer to buffer for string */
+	__be64	errorlen;
+	__be64	handledbuf;
+	__be64	handledlen;
+};
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/include/skiboot.h b/include/skiboot.h
index b2f4ec3ab..e96643cb0 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -181,6 +181,12 @@  extern char __sym_map_start[];
 extern char __sym_map_end[];
 extern size_t snprintf_symbol(char *buf, size_t len, uint64_t addr);
 
+/* RAS */
+void decode_mce(uint64_t srr0, uint64_t srr1,
+			uint32_t dsisr, uint64_t dar,
+			uint64_t *flags, const char **error_str,
+			uint64_t *address);
+
 /* Direct controls */
 extern void direct_controls_init(void);
 extern int64_t opal_signal_system_reset(int cpu_nr);