@@ -3179,8 +3179,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
}
}
- ice_irq_dynamic_ena(hw, NULL, NULL);
-
return IRQ_WAKE_THREAD;
}
@@ -3192,6 +3190,9 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
{
struct ice_pf *pf = data;
+ struct ice_hw *hw;
+
+ hw = &pf->hw;
if (ice_is_reset_in_progress(pf->state))
return IRQ_HANDLED;
@@ -3204,8 +3205,6 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
}
if (pf->oicr_misc & PFINT_OICR_TSYN_TX_M) {
- struct ice_hw *hw = &pf->hw;
-
/* Process outstanding Tx timestamps. If there is more work,
* re-arm the interrupt to trigger again.
*/
@@ -3217,6 +3216,8 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
pf->oicr_misc &= ~PFINT_OICR_TSYN_TX_M;
}
+ ice_irq_dynamic_ena(hw, NULL, NULL);
+
return IRQ_HANDLED;
}
At the end of ice_misc_intr() the driver calls ice_irq_dynamic_ena() to re-enable the miscellaneous interrupt. This is done before the ice_misc_intr_thread_fn can run and complete. According to the kernel function comment documentation for request_threaded_irq(), the interrupt should remain disabled until the thread function completes its task. By re-enabling the interrupt at the end of the hard IRQ, it is possible for a new interrupt to trigger while the thread function is processing. This is problematic for PTP Tx timestamps. For E822 devices, the hardware in the PHY keeps track of how many outstanding timestamps are generated and how many timestamps are read from the PHY. This counter is incremented once for each timestamp that is captured by hardware, and decremented once each time a timestamp is read from the PHY. The PHY will not generate a new interrupt unless this internal counter is zero before the most recently captured timestamp. Because of this counter behavior, a race with the hard IRQ and threaded IRQ function can result in the potential for the counter to get stuck such that no new interrupts will be triggered until the device is reset. Consider the following flow: 1 -> Tx timestamp completes in hardware 2 -> timestamp interrupt occurs 3 -> ice_misc_intr() re-enables timestamp interrupt, and wakes the thread_fn 4 -> thread_fn is running and processing Tx timestamp 5 -> the Tx timestamp is read from PHY, clearing the counter 6 -> a new Tx timestamp completes in hardware, triggering interrupt 7 -> the thread_fn hasn't exited and reported IRQ handled 8 -> ice_misc_intr() triggers and sees PTP interrupt, so tries to wake thread 9 -> thread_fn is already running (IRQTF_RUNTHREAD is set still!) so we skip running the thread... 10 -> an outstanding timestamp is remaining but we never read it 11 -> interrupt never triggers again The fix for this complicated race condition is simple: do not re-enable the miscellaneous interrupt until *after* the thread function completes. If a new timestamp event triggers while the interrupt is disabled, it will be remembered and should cause the interrupt to trigger again immediately after we re-enable the interrupt. Fixes: 1229b33973c7 ("ice: Add low latency Tx timestamp read") Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> --- drivers/net/ethernet/intel/ice/ice_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)