diff mbox series

[5/5] hw/phb4: Enable error interrupts

Message ID 20200121072838.2771-5-oohall@gmail.com
State Accepted
Headers show
Series [1/5] hw/phb4: Make error interrupt handler compile | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch master (d75e82dbfbb9443efeb3f9a5921ac23605aab469)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Oliver O'Halloran Jan. 21, 2020, 7:28 a.m. UTC
In PHB4 the PHB's error and informational interrupts were changed to behave
more like actual LSIs. On PHB3 these interrupts would be only be raised on
a 0 -> 1 transition of an error status bits (i.e. they were rising edge
triggered). On PHB4 the error interrupts are "true" LSIs and will be
re-raised as long the underlying error status bit is set.

This causes a headache for us because OPAL's PHB error handling model
requires Skiboot to preserve the state of the PHB (including errors) until
the kernel is ready to handle the error. As a result we can't do anything
in Skiboot to handle the interrupt condition and we need to mask the error
internally.  We can do this by clearing the relevant bits in the IRQ_ENABLE
registers of the PHB.

It's worth pointing out that we don't want to mask the interrupt by setting
the Q bit in the XIVE ESBs. The ESBs are owned by the OS which may be
masking and unmasking the interrupt for its own reasons (e.g. migrating
IRQs). Skiboot modifying the ESB state could potentially cause problems and
should be avoided.

Cc: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 hw/phb4.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/hw/phb4.c b/hw/phb4.c
index 5f8b9b8f7ac8..da3a965903b4 100644
--- a/hw/phb4.c
+++ b/hw/phb4.c
@@ -114,7 +114,7 @@ 
 #include <nvram.h>
 
 /* Enable this to disable error interrupts for debug purposes */
-#define DISABLE_ERR_INTS
+#undef DISABLE_ERR_INTS
 
 static void phb4_init_hw(struct phb4 *p);
 
@@ -3511,6 +3511,33 @@  static void phb4_int_unmask_all(struct phb4 *p)
 	out_be64(p->regs + PHB_RXE_TCE_ERR_IRQ_ENABLE, 0x60510050c0000000ull);
 }
 
+/*
+ * Mask the IRQ for any currently set error bits. This prevents the PHB's ERR
+ * and INF interrupts from being re-fired before the kernel can handle the
+ * underlying condition.
+ */
+static void phb4_int_mask_active(struct phb4 *p)
+{
+	const uint64_t error_regs[] = {
+		PHB_ERR_STATUS,
+		PHB_TXE_ERR_STATUS,
+		PHB_RXE_ARB_ERR_STATUS,
+		PHB_RXE_MRG_ERR_STATUS,
+		PHB_RXE_TCE_ERR_STATUS
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(error_regs); i++) {
+		uint64_t stat, mask;
+
+		/* The IRQ mask reg is always offset 0x20 from the status reg */
+		stat = phb4_read_reg(p, error_regs[i]);
+		mask = phb4_read_reg(p, error_regs[i] + 0x20);
+
+		phb4_write_reg(p, error_regs[i] + 0x20, mask & ~stat);
+	}
+}
+
 static uint64_t phb4_get_pesta(struct phb4 *p, uint64_t pe_number)
 {
 	uint64_t pesta;
@@ -3828,6 +3855,14 @@  static int64_t phb4_eeh_next_error(struct phb *phb,
 		*severity = OPAL_EEH_SEV_NO_ERROR;
 		phb4_set_err_pending(p, false);
 	}
+
+	/*
+	 * Unmask all our error interrupts once all pending errors
+	 * have been handled.
+	 */
+	if (!phb4_err_pending(p))
+		phb4_int_unmask_all(p);
+
 	return OPAL_SUCCESS;
 }
 
@@ -5588,6 +5623,9 @@  static void phb4_err_interrupt(struct irq_source *is, uint32_t isn)
 
 	PHBDBG(p, "Got interrupt 0x%08x\n", isn);
 
+	/* mask the interrupt conditions to prevent it from re-firing */
+	phb4_int_mask_active(p);
+
 	/* Update pending event */
 	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
 				OPAL_EVENT_PCI_ERROR);