diff mbox series

[v2] powerpc/64: irq replay remove decrementer overflow check

Message ID 20201107014336.2337337-1-npiggin@gmail.com
State Accepted
Commit 59d512e4374b2d8a6ad341475dc94c4a4bdec7d3
Headers show
Series [v2] powerpc/64: irq replay remove decrementer overflow check | expand

Checks

Context Check Description
snowpatch_ozlabs/needsstable success Patch has no Fixes tags
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 108 lines checked
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (f924bf2a58108816db1c95e08af4b1f99c90afa3)

Commit Message

Nicholas Piggin Nov. 7, 2020, 1:43 a.m. UTC
This is way to catch some cases of decrementer overflow, when the
decrementer has underflowed an odd number of times, while MSR[EE] was
disabled.

With a typical small decrementer, a timer that fires when MSR[EE] is
disabled will be "lost" if MSR[EE] remains disabled for between 4.3 and
8.6 seconds after the timer expires. In any case, the decrementer
interrupt would be taken at 8.6 seconds and the timer would be found at
that point.

So this check is for catching extreme latency events, and it prevents
those latencies from being a further few seconds long.  It's not obvious
this is a good tradeoff. This is already a watchdog magnitude event and
that situation is not improved a significantly with this check. For
large decrementers, it's useless.

Therefore remove this check, which avoids a mftb when enabling hard
disabled interrupts (e.g., when enabling after coming from hardware
interrupt handlers). Perhaps more importantly, it also removes the
clunky MSR[EE] vs PACA_IRQ_HARD_DIS incoherency in soft-interrupt replay
which simplifies the code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
v2: Fixed 64e warnings

 arch/powerpc/kernel/irq.c             | 53 ++-------------------------
 arch/powerpc/kernel/time.c            |  9 ++---
 arch/powerpc/platforms/powernv/opal.c |  2 +-
 3 files changed, 8 insertions(+), 56 deletions(-)

Comments

Michael Ellerman Dec. 15, 2020, 10:49 a.m. UTC | #1
On Sat, 7 Nov 2020 11:43:36 +1000, Nicholas Piggin wrote:
> This is way to catch some cases of decrementer overflow, when the
> decrementer has underflowed an odd number of times, while MSR[EE] was
> disabled.
> 
> With a typical small decrementer, a timer that fires when MSR[EE] is
> disabled will be "lost" if MSR[EE] remains disabled for between 4.3 and
> 8.6 seconds after the timer expires. In any case, the decrementer
> interrupt would be taken at 8.6 seconds and the timer would be found at
> that point.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/64: irq replay remove decrementer overflow check
      https://git.kernel.org/powerpc/c/59d512e4374b2d8a6ad341475dc94c4a4bdec7d3

cheers
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 7d0f7682d01d..6b1eca53e36c 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -102,14 +102,6 @@  static inline notrace unsigned long get_irq_happened(void)
 	return happened;
 }
 
-static inline notrace int decrementer_check_overflow(void)
-{
-	u64 now = get_tb();
-	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
- 
-	return now >= *next_tb;
-}
-
 #ifdef CONFIG_PPC_BOOK3E
 
 /* This is called whenever we are re-enabling interrupts
@@ -142,35 +134,6 @@  notrace unsigned int __check_irq_replay(void)
 	trace_hardirqs_on();
 	trace_hardirqs_off();
 
-	/*
-	 * We are always hard disabled here, but PACA_IRQ_HARD_DIS may
-	 * not be set, which means interrupts have only just been hard
-	 * disabled as part of the local_irq_restore or interrupt return
-	 * code. In that case, skip the decrementr check becaus it's
-	 * expensive to read the TB.
-	 *
-	 * HARD_DIS then gets cleared here, but it's reconciled later.
-	 * Either local_irq_disable will replay the interrupt and that
-	 * will reconcile state like other hard interrupts. Or interrupt
-	 * retur will replay the interrupt and in that case it sets
-	 * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S).
-	 */
-	if (happened & PACA_IRQ_HARD_DIS) {
-		local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-
-		/*
-		 * We may have missed a decrementer interrupt if hard disabled.
-		 * Check the decrementer register in case we had a rollover
-		 * while hard disabled.
-		 */
-		if (!(happened & PACA_IRQ_DEC)) {
-			if (decrementer_check_overflow()) {
-				local_paca->irq_happened |= PACA_IRQ_DEC;
-				happened |= PACA_IRQ_DEC;
-			}
-		}
-	}
-
 	if (happened & PACA_IRQ_DEC) {
 		local_paca->irq_happened &= ~PACA_IRQ_DEC;
 		return 0x900;
@@ -186,6 +149,9 @@  notrace unsigned int __check_irq_replay(void)
 		return 0x280;
 	}
 
+	if (happened & PACA_IRQ_HARD_DIS)
+		local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
 	/* There should be nothing left ! */
 	BUG_ON(local_paca->irq_happened != 0);
 
@@ -229,18 +195,6 @@  void replay_soft_interrupts(void)
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 		WARN_ON_ONCE(mfmsr() & MSR_EE);
 
-	if (happened & PACA_IRQ_HARD_DIS) {
-		/*
-		 * We may have missed a decrementer interrupt if hard disabled.
-		 * Check the decrementer register in case we had a rollover
-		 * while hard disabled.
-		 */
-		if (!(happened & PACA_IRQ_DEC)) {
-			if (decrementer_check_overflow())
-				happened |= PACA_IRQ_DEC;
-		}
-	}
-
 	/*
 	 * Force the delivery of pending soft-disabled interrupts on PS3.
 	 * Any HV call will have this side effect.
@@ -345,6 +299,7 @@  notrace void arch_local_irq_restore(unsigned long mask)
 		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 			WARN_ON_ONCE(!(mfmsr() & MSR_EE));
 		__hard_irq_disable();
+		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 	} else {
 		/*
 		 * We should already be hard disabled here. We had bugs
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 74efe46f5532..7d372ff3504b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -552,14 +552,11 @@  void timer_interrupt(struct pt_regs *regs)
 	struct pt_regs *old_regs;
 	u64 now;
 
-	/* Some implementations of hotplug will get timer interrupts while
-	 * offline, just ignore these and we also need to set
-	 * decrementers_next_tb as MAX to make sure __check_irq_replay
-	 * don't replay timer interrupt when return, otherwise we'll trap
-	 * here infinitely :(
+	/*
+	 * Some implementations of hotplug will get timer interrupts while
+	 * offline, just ignore these.
 	 */
 	if (unlikely(!cpu_online(smp_processor_id()))) {
-		*next_tb = ~(u64)0;
 		set_dec(decrementer_max);
 		return;
 	}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index d95954ad4c0a..c61c3b62c8c6 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -731,7 +731,7 @@  int opal_hmi_exception_early2(struct pt_regs *regs)
 	return 1;
 }
 
-/* HMI exception handler called in virtual mode during check_irq_replay. */
+/* HMI exception handler called in virtual mode when irqs are next enabled. */
 int opal_handle_hmi_exception(struct pt_regs *regs)
 {
 	/*