| Message ID | 20260519155108.4092518-3-va@nvidia.com |
|---|---|
| State | New |
| Headers | show |
| Series | spi: tegra210-quad: Improve interrupt handling for loaded systems | expand |
On 19/05/2026 16:51, Vishwaroop A wrote: > The threaded IRQ handler reads QSPI_TRANS_STATUS to check for transfer There is no threaded IRQ handler now. > completion, but on heavily loaded systems, the thread can be delayed > long enough for wait_for_completion_timeout() to expire first. When > the timeout handler then reads TRANS_STATUS directly from hardware, > it may see a completed transfer but race with the (now-running) IRQ > thread, leading to double completion or use-after-free on curr_xfer. > > With the conversion to hard IRQ + workqueue in the previous patch, Why not just explain this with regard to the workqueue that is now in place? If this issue is independent of having a threaded IRQ or workqueue, just explain the issue independently of either a threaded IRQ or workqueue. > this race still exists: the workqueue bottom-half can be delayed > past the timeout, and the timeout handler reading hardware directly > has no synchronization with the ISR's cached state. > > Cache QSPI_TRANS_STATUS in the ISR before clearing it, allowing the > timeout handler to check the cached value under spinlock. Also guard > against curr_xfer being NULLed by a concurrent workqueue completion. s/NULLed/cleared/ > > Signed-off-by: Vishwaroop A <va@nvidia.com> > --- > drivers/spi/spi-tegra210-quad.c | 30 +++++++++++++++++++----------- > 1 file changed, 19 insertions(+), 11 deletions(-) > > diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c > index 17d0b511af1d..72f66f2c6dab 100644 > --- a/drivers/spi/spi-tegra210-quad.c > +++ b/drivers/spi/spi-tegra210-quad.c > @@ -214,6 +214,7 @@ struct tegra_qspi { > u32 tx_status; > u32 rx_status; > u32 status_reg; > + u32 trans_status; > bool is_packed; > bool use_dma; > > @@ -854,6 +855,7 @@ static u32 tegra_qspi_setup_transfer_one(struct spi_device *spi, struct spi_tran > tqspi->cur_rx_pos = 0; > tqspi->cur_tx_pos = 0; > tqspi->curr_xfer = t; > + tqspi->trans_status = 0; > spin_unlock_irqrestore(&tqspi->lock, flags); > > if (is_first_of_msg) { > @@ -1068,26 +1070,30 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi); > */ > static int tegra_qspi_handle_timeout(struct tegra_qspi *tqspi) > { > + unsigned long flags; > irqreturn_t ret; > - u32 status; > > - /* Check if hardware actually completed the transfer */ > - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > - if (!(status & QSPI_RDY)) > + spin_lock_irqsave(&tqspi->lock, flags); > + > + if (!(tqspi->trans_status & QSPI_RDY)) { > + spin_unlock_irqrestore(&tqspi->lock, flags); > return -ETIMEDOUT; > + } > > /* > - * Hardware completed but interrupt was lost/delayed. Manually > - * process the completion by calling the appropriate handler. > + * ISR or workqueue may have already completed the transfer > + * and NULLed curr_xfer between the completion timeout and now. > */ > + if (!tqspi->curr_xfer) { > + spin_unlock_irqrestore(&tqspi->lock, flags); > + return 0; > + } > + > + spin_unlock_irqrestore(&tqspi->lock, flags); > + > dev_warn_ratelimited(tqspi->dev, > "QSPI interrupt timeout, but transfer complete\n"); > > - /* Clear the transfer status */ > - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > - tegra_qspi_writel(tqspi, status, QSPI_TRANS_STATUS); > - > - /* Manually trigger completion handler */ > if (!tqspi->is_curr_dma_xfer) > ret = handle_cpu_based_xfer(tqspi); > else > @@ -1642,6 +1648,8 @@ static irqreturn_t tegra_qspi_isr(int irq, void *context_data) > if (!(status & QSPI_RDY)) > return IRQ_NONE; > > + tqspi->trans_status = status; > + > spin_lock(&tqspi->lock); > tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS); > tegra_qspi_mask_clear_irq(tqspi);
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 17d0b511af1d..72f66f2c6dab 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -214,6 +214,7 @@ struct tegra_qspi { u32 tx_status; u32 rx_status; u32 status_reg; + u32 trans_status; bool is_packed; bool use_dma; @@ -854,6 +855,7 @@ static u32 tegra_qspi_setup_transfer_one(struct spi_device *spi, struct spi_tran tqspi->cur_rx_pos = 0; tqspi->cur_tx_pos = 0; tqspi->curr_xfer = t; + tqspi->trans_status = 0; spin_unlock_irqrestore(&tqspi->lock, flags); if (is_first_of_msg) { @@ -1068,26 +1070,30 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi); */ static int tegra_qspi_handle_timeout(struct tegra_qspi *tqspi) { + unsigned long flags; irqreturn_t ret; - u32 status; - /* Check if hardware actually completed the transfer */ - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); - if (!(status & QSPI_RDY)) + spin_lock_irqsave(&tqspi->lock, flags); + + if (!(tqspi->trans_status & QSPI_RDY)) { + spin_unlock_irqrestore(&tqspi->lock, flags); return -ETIMEDOUT; + } /* - * Hardware completed but interrupt was lost/delayed. Manually - * process the completion by calling the appropriate handler. + * ISR or workqueue may have already completed the transfer + * and NULLed curr_xfer between the completion timeout and now. */ + if (!tqspi->curr_xfer) { + spin_unlock_irqrestore(&tqspi->lock, flags); + return 0; + } + + spin_unlock_irqrestore(&tqspi->lock, flags); + dev_warn_ratelimited(tqspi->dev, "QSPI interrupt timeout, but transfer complete\n"); - /* Clear the transfer status */ - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); - tegra_qspi_writel(tqspi, status, QSPI_TRANS_STATUS); - - /* Manually trigger completion handler */ if (!tqspi->is_curr_dma_xfer) ret = handle_cpu_based_xfer(tqspi); else @@ -1642,6 +1648,8 @@ static irqreturn_t tegra_qspi_isr(int irq, void *context_data) if (!(status & QSPI_RDY)) return IRQ_NONE; + tqspi->trans_status = status; + spin_lock(&tqspi->lock); tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS); tegra_qspi_mask_clear_irq(tqspi);
The threaded IRQ handler reads QSPI_TRANS_STATUS to check for transfer completion, but on heavily loaded systems, the thread can be delayed long enough for wait_for_completion_timeout() to expire first. When the timeout handler then reads TRANS_STATUS directly from hardware, it may see a completed transfer but race with the (now-running) IRQ thread, leading to double completion or use-after-free on curr_xfer. With the conversion to hard IRQ + workqueue in the previous patch, this race still exists: the workqueue bottom-half can be delayed past the timeout, and the timeout handler reading hardware directly has no synchronization with the ISR's cached state. Cache QSPI_TRANS_STATUS in the ISR before clearing it, allowing the timeout handler to check the cached value under spinlock. Also guard against curr_xfer being NULLed by a concurrent workqueue completion. Signed-off-by: Vishwaroop A <va@nvidia.com> --- drivers/spi/spi-tegra210-quad.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-)