| Message ID | 20260518160739.3286438-3-va@nvidia.com |
|---|---|
| State | Changes Requested |
| Headers | show |
| Series | spi: tegra210-quad: Improve interrupt handling for loaded systems | expand |
On 18/05/2026 17:07, Vishwaroop A wrote: > On heavily loaded systems, workqueue scheduling delays can exceed the > transfer timeout even though hardware completes the transfer in > microseconds. The timeout handler cannot distinguish between a real > hardware timeout and a delayed workqueue, causing false timeout errors. Can you explain this better? Ie. exactly how does this occur? > Cache QSPI_TRANS_STATUS in the ISR before clearing it, allowing the > timeout handler to check if hardware completed (QSPI_RDY set) versus > a real timeout (QSPI_RDY not set). This prevents false timeout errors > when the hardware completes but the workqueue is delayed. The workqueue was introduced in patch 1/3. Did this problem already exist with the threaded IRQ? > > Signed-off-by: Vishwaroop A <va@nvidia.com> > --- > drivers/spi/spi-tegra210-quad.c | 42 ++++++++++++++++++++------------- > 1 file changed, 26 insertions(+), 16 deletions(-) > > diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c > index a551c7a7f6c4..6148267a51cd 100644 > --- a/drivers/spi/spi-tegra210-quad.c > +++ b/drivers/spi/spi-tegra210-quad.c > @@ -214,6 +214,7 @@ struct tegra_qspi { > u32 tx_status; > u32 rx_status; > u32 status_reg; > + u32 trans_status; > bool is_packed; > bool use_dma; > > @@ -854,6 +855,7 @@ static u32 tegra_qspi_setup_transfer_one(struct spi_device *spi, struct spi_tran > tqspi->cur_rx_pos = 0; > tqspi->cur_tx_pos = 0; > tqspi->curr_xfer = t; > + tqspi->trans_status = 0; > spin_unlock_irqrestore(&tqspi->lock, flags); > > if (is_first_of_msg) { > @@ -1068,26 +1070,32 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi); > */ > static int tegra_qspi_handle_timeout(struct tegra_qspi *tqspi) > { > + unsigned long flags; > irqreturn_t ret; > - u32 status; > + u32 trans_status; This variable is only used once and so seem unnecessary to have a local variable for this. > > - /* Check if hardware actually completed the transfer */ > - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > - if (!(status & QSPI_RDY)) > + spin_lock_irqsave(&tqspi->lock, flags); > + > + trans_status = tqspi->trans_status; > + if (!(trans_status & QSPI_RDY)) { > + spin_unlock_irqrestore(&tqspi->lock, flags); > return -ETIMEDOUT; > + } > > /* > - * Hardware completed but interrupt was lost/delayed. Manually > - * process the completion by calling the appropriate handler. > + * ISR or workqueue may have already completed the transfer > + * and NULLed curr_xfer between the completion timeout and now. > */ > + if (!tqspi->curr_xfer) { > + spin_unlock_irqrestore(&tqspi->lock, flags); > + return 0; > + } > + > + spin_unlock_irqrestore(&tqspi->lock, flags); > + > dev_warn_ratelimited(tqspi->dev, > "QSPI interrupt timeout, but transfer complete\n"); > > - /* Clear the transfer status */ > - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > - tegra_qspi_writel(tqspi, status, QSPI_TRANS_STATUS); > - > - /* Manually trigger completion handler */ > if (!tqspi->is_curr_dma_xfer) > ret = handle_cpu_based_xfer(tqspi); > else > @@ -1227,9 +1235,9 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, > > if (ret == 0) { > /* > - * Check if hardware completed the transfer > - * even though interrupt was lost or delayed. > - * If so, process the completion and continue. > + * Check if hardware completed the transfer even though > + * workqueue was delayed. If so, process completion and > + * continue. Shouldn't this part be part of patch 1/3 because the introduced the workqueue? > */ > ret = tegra_qspi_handle_timeout(tqspi); > if (ret < 0) { > @@ -1346,8 +1354,8 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi, > if (ret == 0) { > /* > * Check if hardware completed the transfer even though > - * interrupt was lost or delayed. If so, process the > - * completion and continue. > + * workqueue was delayed. If so, process completion and > + * continue. Same here. > */ > ret = tegra_qspi_handle_timeout(tqspi); > if (ret < 0) { > @@ -1642,6 +1650,8 @@ static irqreturn_t tegra_qspi_isr(int irq, void *context_data) > if (!(status & QSPI_RDY)) > return IRQ_NONE; > > + tqspi->trans_status = status; > + > spin_lock(&tqspi->lock); > tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS); > tegra_qspi_mask_clear_irq(tqspi);
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index a551c7a7f6c4..6148267a51cd 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -214,6 +214,7 @@ struct tegra_qspi { u32 tx_status; u32 rx_status; u32 status_reg; + u32 trans_status; bool is_packed; bool use_dma; @@ -854,6 +855,7 @@ static u32 tegra_qspi_setup_transfer_one(struct spi_device *spi, struct spi_tran tqspi->cur_rx_pos = 0; tqspi->cur_tx_pos = 0; tqspi->curr_xfer = t; + tqspi->trans_status = 0; spin_unlock_irqrestore(&tqspi->lock, flags); if (is_first_of_msg) { @@ -1068,26 +1070,32 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi); */ static int tegra_qspi_handle_timeout(struct tegra_qspi *tqspi) { + unsigned long flags; irqreturn_t ret; - u32 status; + u32 trans_status; - /* Check if hardware actually completed the transfer */ - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); - if (!(status & QSPI_RDY)) + spin_lock_irqsave(&tqspi->lock, flags); + + trans_status = tqspi->trans_status; + if (!(trans_status & QSPI_RDY)) { + spin_unlock_irqrestore(&tqspi->lock, flags); return -ETIMEDOUT; + } /* - * Hardware completed but interrupt was lost/delayed. Manually - * process the completion by calling the appropriate handler. + * ISR or workqueue may have already completed the transfer + * and NULLed curr_xfer between the completion timeout and now. */ + if (!tqspi->curr_xfer) { + spin_unlock_irqrestore(&tqspi->lock, flags); + return 0; + } + + spin_unlock_irqrestore(&tqspi->lock, flags); + dev_warn_ratelimited(tqspi->dev, "QSPI interrupt timeout, but transfer complete\n"); - /* Clear the transfer status */ - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); - tegra_qspi_writel(tqspi, status, QSPI_TRANS_STATUS); - - /* Manually trigger completion handler */ if (!tqspi->is_curr_dma_xfer) ret = handle_cpu_based_xfer(tqspi); else @@ -1227,9 +1235,9 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, if (ret == 0) { /* - * Check if hardware completed the transfer - * even though interrupt was lost or delayed. - * If so, process the completion and continue. + * Check if hardware completed the transfer even though + * workqueue was delayed. If so, process completion and + * continue. */ ret = tegra_qspi_handle_timeout(tqspi); if (ret < 0) { @@ -1346,8 +1354,8 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi, if (ret == 0) { /* * Check if hardware completed the transfer even though - * interrupt was lost or delayed. If so, process the - * completion and continue. + * workqueue was delayed. If so, process completion and + * continue. */ ret = tegra_qspi_handle_timeout(tqspi); if (ret < 0) { @@ -1642,6 +1650,8 @@ static irqreturn_t tegra_qspi_isr(int irq, void *context_data) if (!(status & QSPI_RDY)) return IRQ_NONE; + tqspi->trans_status = status; + spin_lock(&tqspi->lock); tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS); tegra_qspi_mask_clear_irq(tqspi);
On heavily loaded systems, workqueue scheduling delays can exceed the transfer timeout even though hardware completes the transfer in microseconds. The timeout handler cannot distinguish between a real hardware timeout and a delayed workqueue, causing false timeout errors. Cache QSPI_TRANS_STATUS in the ISR before clearing it, allowing the timeout handler to check if hardware completed (QSPI_RDY set) versus a real timeout (QSPI_RDY not set). This prevents false timeout errors when the hardware completes but the workqueue is delayed. Signed-off-by: Vishwaroop A <va@nvidia.com> --- drivers/spi/spi-tegra210-quad.c | 42 ++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 16 deletions(-)