| Message ID | 20260519155108.4092518-2-va@nvidia.com |
|---|---|
| State | New |
| Headers | show |
| Series | spi: tegra210-quad: Improve interrupt handling for loaded systems | expand |
On 19/05/2026 16:51, Vishwaroop A wrote: > Threaded IRQ handlers suffer from scheduler latency on heavily loaded > systems, causing false transfer timeouts. Convert to hard IRQ handler > that schedules work on a high-priority unbound workqueue. > > The hard IRQ handler verifies the interrupt, caches FIFO status, > clears and masks interrupts, then schedules bottom-half processing. > The workqueue handler runs in process context (can sleep for DMA) > and can execute on any CPU, avoiding CPU0 bottlenecks. > > Signed-off-by: Vishwaroop A <va@nvidia.com> > --- > drivers/spi/spi-tegra210-quad.c | 128 +++++++++++++++++++++----------- > 1 file changed, 84 insertions(+), 44 deletions(-) > > diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c > index db28dd556484..17d0b511af1d 100644 > --- a/drivers/spi/spi-tegra210-quad.c > +++ b/drivers/spi/spi-tegra210-quad.c > @@ -191,6 +191,8 @@ struct tegra_qspi { > void __iomem *base; > phys_addr_t phys; > unsigned int irq; > + struct work_struct irq_work; > + struct workqueue_struct *wq; > > u32 cur_speed; > unsigned int cur_pos; > @@ -1225,9 +1227,9 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, > > if (ret == 0) { > /* > - * Check if hardware completed the transfer > - * even though interrupt was lost or delayed. > - * If so, process the completion and continue. > + * Check if hardware completed the transfer even though > + * workqueue was delayed. If so, process completion and > + * continue. > */ > ret = tegra_qspi_handle_timeout(tqspi); > if (ret < 0) { > @@ -1344,8 +1346,8 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi, > if (ret == 0) { > /* > * Check if hardware completed the transfer even though > - * interrupt was lost or delayed. If so, process the > - * completion and continue. > + * workqueue was delayed. If so, process completion and > + * continue. > */ > ret = tegra_qspi_handle_timeout(tqspi); > if (ret < 0) { > @@ -1574,46 +1576,40 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi) > return IRQ_HANDLED; > } > > -static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data) > +/** > + * tegra_qspi_work_handler - Workqueue handler for interrupt bottom-half > + * @work: work_struct embedded in tegra_qspi > + * > + * Runs in process context and can sleep (needed for DMA completion waits). > + * Can run on any available CPU, avoiding CPU0 bottleneck that occurs with > + * threaded IRQ handlers which are pinned to the IRQ's CPU. > + * > + * The hard IRQ handler has already: > + * - Verified this is our interrupt (QSPI_RDY was set) > + * - Cached FIFO status in tqspi->status_reg > + * - Parsed tx_status / rx_status from FIFO status > + * - Masked further interrupts > + */ > +static void tegra_qspi_work_handler(struct work_struct *work) > { > - struct tegra_qspi *tqspi = context_data; > + struct tegra_qspi *tqspi = container_of(work, struct tegra_qspi, irq_work); > unsigned long flags; > - u32 status; > > - /* > - * Read transfer status to check if interrupt was triggered by transfer > - * completion > - */ > - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > + spin_lock_irqsave(&tqspi->lock, flags); > > /* > - * Occasionally the IRQ thread takes a long time to wake up (usually > - * when the CPU that it's running on is excessively busy) and we have > - * already reached the timeout before and cleaned up the timed out > - * transfer. Avoid any processing in that case and bail out early. > - * > - * If no transfer is in progress, check if this was a real interrupt > - * that the timeout handler already processed, or a spurious one. > + * Check if timeout handler already processed this transfer. > + * Can happen if work was delayed and timeout fired first. If > + * so, we must unmask interrupts before returning, otherwise > + * they remain masked from the hard IRQ handler and the next > + * transfer will timeout. > */ > - spin_lock_irqsave(&tqspi->lock, flags); > if (!tqspi->curr_xfer) { > spin_unlock_irqrestore(&tqspi->lock, flags); > - /* Spurious interrupt - transfer not ready */ > - if (!(status & QSPI_RDY)) > - return IRQ_NONE; > - /* Real interrupt, already handled by timeout path */ > - return IRQ_HANDLED; > + tegra_qspi_unmask_irq(tqspi); > + return; > } > > - tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS); > - > - if (tqspi->cur_direction & DATA_DIR_TX) > - tqspi->tx_status = tqspi->status_reg & (QSPI_TX_FIFO_UNF | QSPI_TX_FIFO_OVF); > - > - if (tqspi->cur_direction & DATA_DIR_RX) > - tqspi->rx_status = tqspi->status_reg & (QSPI_RX_FIFO_OVF | QSPI_RX_FIFO_UNF); > - > - tegra_qspi_mask_clear_irq(tqspi); > spin_unlock_irqrestore(&tqspi->lock, flags); > > /* > @@ -1623,9 +1619,46 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data) > * cannot be done while holding spinlock. > */ > if (!tqspi->is_curr_dma_xfer) > - return handle_cpu_based_xfer(tqspi); > + handle_cpu_based_xfer(tqspi); > + else > + handle_dma_based_xfer(tqspi); > +} > + > +/** > + * tegra_qspi_isr - Hard IRQ handler > + * @irq: IRQ number > + * @context_data: QSPI controller instance > + * > + * Runs in hard IRQ context with minimal latency. Cannot sleep. > + * > + * Return: IRQ_NONE if not our interrupt, IRQ_HANDLED if handled > + */ > +static irqreturn_t tegra_qspi_isr(int irq, void *context_data) > +{ > + struct tegra_qspi *tqspi = context_data; > + u32 status; > + > + status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > + if (!(status & QSPI_RDY)) > + return IRQ_NONE; > + > + spin_lock(&tqspi->lock); > + tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS); > + tegra_qspi_mask_clear_irq(tqspi); > > - return handle_dma_based_xfer(tqspi); > + if (tqspi->cur_direction & DATA_DIR_TX) > + tqspi->tx_status = tqspi->status_reg & > + (QSPI_TX_FIFO_UNF | QSPI_TX_FIFO_OVF); > + > + if (tqspi->cur_direction & DATA_DIR_RX) > + tqspi->rx_status = tqspi->status_reg & > + (QSPI_RX_FIFO_OVF | QSPI_RX_FIFO_UNF); > + > + spin_unlock(&tqspi->lock); > + > + queue_work(tqspi->wq, &tqspi->irq_work); > + > + return IRQ_HANDLED; > } > > static struct tegra_qspi_soc_data tegra210_qspi_soc_data = { > @@ -1793,9 +1826,19 @@ static int tegra_qspi_probe(struct platform_device *pdev) > > pm_runtime_put_autosuspend(&pdev->dev); > > - ret = request_threaded_irq(tqspi->irq, NULL, > - tegra_qspi_isr_thread, IRQF_ONESHOT, > - dev_name(&pdev->dev), tqspi); > + tqspi->wq = devm_alloc_workqueue(&pdev->dev, "%s", > + WQ_HIGHPRI | WQ_UNBOUND, 0, > + dev_name(&pdev->dev)); > + if (!tqspi->wq) { > + dev_err(&pdev->dev, "failed to allocate workqueue\n"); > + ret = -ENOMEM; > + goto exit_pm_disable; > + } > + > + INIT_WORK(&tqspi->irq_work, tegra_qspi_work_handler); > + > + ret = devm_request_irq(&pdev->dev, tqspi->irq, tegra_qspi_isr, > + IRQF_SHARED, dev_name(&pdev->dev), tqspi); > if (ret < 0) { > dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", tqspi->irq, ret); > goto exit_pm_disable; > @@ -1804,13 +1847,11 @@ static int tegra_qspi_probe(struct platform_device *pdev) > ret = spi_register_controller(host); > if (ret < 0) { > dev_err(&pdev->dev, "failed to register host: %d\n", ret); > - goto exit_free_irq; > + goto exit_pm_disable; > } > > return 0; > > -exit_free_irq: > - free_irq(qspi_irq, tqspi); > exit_pm_disable: > pm_runtime_dont_use_autosuspend(&pdev->dev); > pm_runtime_force_suspend(&pdev->dev); > @@ -1824,7 +1865,6 @@ static void tegra_qspi_remove(struct platform_device *pdev) > struct tegra_qspi *tqspi = spi_controller_get_devdata(host); > > spi_unregister_controller(host); > - free_irq(tqspi->irq, tqspi); > pm_runtime_dont_use_autosuspend(&pdev->dev); > pm_runtime_force_suspend(&pdev->dev); > tegra_qspi_deinit_dma(tqspi); Looks like you dropped the ... flush_workqueue(tqspi->wq); Don't we still need this? Jon
On Tue, May 19, 2026 at 03:51:06PM +0000, Vishwaroop A wrote: > Threaded IRQ handlers suffer from scheduler latency on heavily loaded > systems, causing false transfer timeouts. Convert to hard IRQ handler > that schedules work on a high-priority unbound workqueue. > > The hard IRQ handler verifies the interrupt, caches FIFO status, > clears and masks interrupts, then schedules bottom-half processing. > The workqueue handler runs in process context (can sleep for DMA) > and can execute on any CPU, avoiding CPU0 bottlenecks. Thanks for doing this work! > + status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > + if (!(status & QSPI_RDY)) > + return IRQ_NONE; > + > + spin_lock(&tqspi->lock); Can you help me to understand what the tqspi->lock protects? I am still a bit confused by this lock, but at the first glance, I am wondering if you don't need to have the lock while reading the status. Thanks --breno
On Wed, May 20, 2026 at 10:22:53AM +0100, Jon Hunter wrote: > > > On 19/05/2026 16:51, Vishwaroop A wrote: > > Threaded IRQ handlers suffer from scheduler latency on heavily loaded > > systems, causing false transfer timeouts. Convert to hard IRQ handler > > that schedules work on a high-priority unbound workqueue. Please delete unneeded context from mails when replying. Doing this makes it much easier to find your reply in the message, helping ensure it won't be missed by people scrolling through the irrelevant quoted material.
On Wed, May 20, 2026 at 08:25:23AM -0700, Breno Leitao wrote: > > + status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > > + if (!(status & QSPI_RDY)) > > + return IRQ_NONE; > > + > > + spin_lock(&tqspi->lock); > > Can you help me to understand what the tqspi->lock protects? I am still > a bit confused by this lock, but at the first glance, I am wondering if > you don't need to have the lock while reading the status. Good question. The QSPI_TRANS_STATUS read before the lock is a hardware register read (MMIO) used as the IRQF_SHARED ownership check -- we read QSPI_RDY to determine if this interrupt belongs to us. If not, we return IRQ_NONE immediately. Taking the lock before this check would serialize against every unrelated interrupt on the shared line for no benefit, since we're reading hardware state that no software path can modify concurrently. The only CPU-side write to QSPI_TRANS_STATUS is the write-1-to-clear inside tegra_qspi_mask_clear_irq(), which happens under the lock at line 1655, after we've already confirmed the interrupt is ours. The register is also cleared at the start of each new message in tegra_qspi_setup_transfer_one() -> tegra_qspi_mask_clear_irq(), but that runs before the transfer is started and interrupts are unmasked, so there's no overlap with the ISR. The lock itself protects the software state that is shared between the ISR, the workqueue bottom-half, and the timeout handler running in the transfer thread. Specifically, curr_xfer is read and NULLed by handle_cpu_based_xfer/handle_dma_based_xfer under the lock, and checked by the timeout handler under the lock, so concurrent access is serialized. The ISR writes status_reg, tx_status, and rx_status under the lock before scheduling the workqueue; the workqueue and transfer thread read them after the scheduling barrier or completion respectively, so the lock provides the write-side ordering. The trans_status caching (tqspi->trans_status = status) before the lock is safe because the ISR is the sole writer in interrupt context. The only reader is handle_timeout, which reads it under spin_lock_irqsave after wait_for_completion_timeout expires. The setup path resets it to zero under the lock before starting each transfer, well before the ISR can fire. Vishwaroop
Hello Vishwaroop, On Wed, May 20, 2026 at 07:22:10PM +0000, Vishwaroop A wrote: > On Wed, May 20, 2026 at 08:25:23AM -0700, Breno Leitao wrote: > > > + status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); > > > + if (!(status & QSPI_RDY)) > > > + return IRQ_NONE; > > > + > > > + spin_lock(&tqspi->lock); > > > > Can you help me to understand what the tqspi->lock protects? I am still > > a bit confused by this lock, but at the first glance, I am wondering if > > you don't need to have the lock while reading the status. > > The lock itself protects the software state that is shared between > the ISR, the workqueue bottom-half, and the timeout handler running > in the transfer thread. Thanks for the reply! I got the impression that tqspi->lock is also serializing hardware accesses — the hard IRQ holds it across the readl(QSPI_FIFO_STATUS) and across tegra_qspi_mask_clear_irq(), which does an RMW on QSPI_INTR_MASK and a W1C on QSPI_TRANS_STATUS; the setup, start, error and bottom-half paths similarly hold it across MMIO. If the lock is really only guarding curr_xfer / status_reg / tx_status / rx_status, why hold it across those register accesses at all? Thanks, --breno
On Thu, May 21, 2026 at 08:04:08AM -0700, Breno Leitao wrote: > If the lock is really only guarding curr_xfer / status_reg / tx_status > / rx_status, why hold it across those register accesses at all? Thanks for pushing on this, it's a fair question and worth spelling out more carefully. The lock spans the register accesses because the FIFO_STATUS read and the tx_status / rx_status / status_reg writes form one update -- the software fields are populated directly from that read. The bottom-half (handle_cpu_based_xfer at line 1467) and the timeout handler (which calls into handle_cpu_based_xfer at line 1098 after releasing its own acquisition at line 1092) both read tx_status and rx_status under the same lock. Splitting the lock so the read is outside would let those callers observe partially updated software state. The mask_clear_irq call is inside the same critical section because it is the W1C that retires the interrupt source the ISR just consumed. Pairing it with the FIFO_STATUS read and the software writes ensures that any subsequent acquirer of the lock sees either "interrupt is pending and software fields are stale" or "interrupt is retired and software fields are current" -- never the middle state. handle_cpu_based_xfer holds the lock across its FIFO drain and sub-transfer restart for the same reason: the restart calls tegra_qspi_unmask_irq, which can re-trigger the ISR before the handler finishes setting curr_xfer = NULL at line 1497. Releasing the lock only after that assignment lets the next ISR's critical section start from a consistent state. TRANS_STATUS is read outside the lock because that read precedes the sequence -- it is the IRQF_SHARED ownership check, a single readl with no associated software state and no RMW. The two CPU-side paths that clear it are tegra_qspi_mask_clear_irq inside the ISR's locked section (line 1655) and tegra_qspi_mask_clear_irq from setup_transfer_one (line 862), which runs before the controller unmasks its interrupts, so it cannot overlap with an ISR. Vishwaroop
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index db28dd556484..17d0b511af1d 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -191,6 +191,8 @@ struct tegra_qspi { void __iomem *base; phys_addr_t phys; unsigned int irq; + struct work_struct irq_work; + struct workqueue_struct *wq; u32 cur_speed; unsigned int cur_pos; @@ -1225,9 +1227,9 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, if (ret == 0) { /* - * Check if hardware completed the transfer - * even though interrupt was lost or delayed. - * If so, process the completion and continue. + * Check if hardware completed the transfer even though + * workqueue was delayed. If so, process completion and + * continue. */ ret = tegra_qspi_handle_timeout(tqspi); if (ret < 0) { @@ -1344,8 +1346,8 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi, if (ret == 0) { /* * Check if hardware completed the transfer even though - * interrupt was lost or delayed. If so, process the - * completion and continue. + * workqueue was delayed. If so, process completion and + * continue. */ ret = tegra_qspi_handle_timeout(tqspi); if (ret < 0) { @@ -1574,46 +1576,40 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi) return IRQ_HANDLED; } -static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data) +/** + * tegra_qspi_work_handler - Workqueue handler for interrupt bottom-half + * @work: work_struct embedded in tegra_qspi + * + * Runs in process context and can sleep (needed for DMA completion waits). + * Can run on any available CPU, avoiding CPU0 bottleneck that occurs with + * threaded IRQ handlers which are pinned to the IRQ's CPU. + * + * The hard IRQ handler has already: + * - Verified this is our interrupt (QSPI_RDY was set) + * - Cached FIFO status in tqspi->status_reg + * - Parsed tx_status / rx_status from FIFO status + * - Masked further interrupts + */ +static void tegra_qspi_work_handler(struct work_struct *work) { - struct tegra_qspi *tqspi = context_data; + struct tegra_qspi *tqspi = container_of(work, struct tegra_qspi, irq_work); unsigned long flags; - u32 status; - /* - * Read transfer status to check if interrupt was triggered by transfer - * completion - */ - status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); + spin_lock_irqsave(&tqspi->lock, flags); /* - * Occasionally the IRQ thread takes a long time to wake up (usually - * when the CPU that it's running on is excessively busy) and we have - * already reached the timeout before and cleaned up the timed out - * transfer. Avoid any processing in that case and bail out early. - * - * If no transfer is in progress, check if this was a real interrupt - * that the timeout handler already processed, or a spurious one. + * Check if timeout handler already processed this transfer. + * Can happen if work was delayed and timeout fired first. If + * so, we must unmask interrupts before returning, otherwise + * they remain masked from the hard IRQ handler and the next + * transfer will timeout. */ - spin_lock_irqsave(&tqspi->lock, flags); if (!tqspi->curr_xfer) { spin_unlock_irqrestore(&tqspi->lock, flags); - /* Spurious interrupt - transfer not ready */ - if (!(status & QSPI_RDY)) - return IRQ_NONE; - /* Real interrupt, already handled by timeout path */ - return IRQ_HANDLED; + tegra_qspi_unmask_irq(tqspi); + return; } - tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS); - - if (tqspi->cur_direction & DATA_DIR_TX) - tqspi->tx_status = tqspi->status_reg & (QSPI_TX_FIFO_UNF | QSPI_TX_FIFO_OVF); - - if (tqspi->cur_direction & DATA_DIR_RX) - tqspi->rx_status = tqspi->status_reg & (QSPI_RX_FIFO_OVF | QSPI_RX_FIFO_UNF); - - tegra_qspi_mask_clear_irq(tqspi); spin_unlock_irqrestore(&tqspi->lock, flags); /* @@ -1623,9 +1619,46 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data) * cannot be done while holding spinlock. */ if (!tqspi->is_curr_dma_xfer) - return handle_cpu_based_xfer(tqspi); + handle_cpu_based_xfer(tqspi); + else + handle_dma_based_xfer(tqspi); +} + +/** + * tegra_qspi_isr - Hard IRQ handler + * @irq: IRQ number + * @context_data: QSPI controller instance + * + * Runs in hard IRQ context with minimal latency. Cannot sleep. + * + * Return: IRQ_NONE if not our interrupt, IRQ_HANDLED if handled + */ +static irqreturn_t tegra_qspi_isr(int irq, void *context_data) +{ + struct tegra_qspi *tqspi = context_data; + u32 status; + + status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS); + if (!(status & QSPI_RDY)) + return IRQ_NONE; + + spin_lock(&tqspi->lock); + tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS); + tegra_qspi_mask_clear_irq(tqspi); - return handle_dma_based_xfer(tqspi); + if (tqspi->cur_direction & DATA_DIR_TX) + tqspi->tx_status = tqspi->status_reg & + (QSPI_TX_FIFO_UNF | QSPI_TX_FIFO_OVF); + + if (tqspi->cur_direction & DATA_DIR_RX) + tqspi->rx_status = tqspi->status_reg & + (QSPI_RX_FIFO_OVF | QSPI_RX_FIFO_UNF); + + spin_unlock(&tqspi->lock); + + queue_work(tqspi->wq, &tqspi->irq_work); + + return IRQ_HANDLED; } static struct tegra_qspi_soc_data tegra210_qspi_soc_data = { @@ -1793,9 +1826,19 @@ static int tegra_qspi_probe(struct platform_device *pdev) pm_runtime_put_autosuspend(&pdev->dev); - ret = request_threaded_irq(tqspi->irq, NULL, - tegra_qspi_isr_thread, IRQF_ONESHOT, - dev_name(&pdev->dev), tqspi); + tqspi->wq = devm_alloc_workqueue(&pdev->dev, "%s", + WQ_HIGHPRI | WQ_UNBOUND, 0, + dev_name(&pdev->dev)); + if (!tqspi->wq) { + dev_err(&pdev->dev, "failed to allocate workqueue\n"); + ret = -ENOMEM; + goto exit_pm_disable; + } + + INIT_WORK(&tqspi->irq_work, tegra_qspi_work_handler); + + ret = devm_request_irq(&pdev->dev, tqspi->irq, tegra_qspi_isr, + IRQF_SHARED, dev_name(&pdev->dev), tqspi); if (ret < 0) { dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", tqspi->irq, ret); goto exit_pm_disable; @@ -1804,13 +1847,11 @@ static int tegra_qspi_probe(struct platform_device *pdev) ret = spi_register_controller(host); if (ret < 0) { dev_err(&pdev->dev, "failed to register host: %d\n", ret); - goto exit_free_irq; + goto exit_pm_disable; } return 0; -exit_free_irq: - free_irq(qspi_irq, tqspi); exit_pm_disable: pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_force_suspend(&pdev->dev); @@ -1824,7 +1865,6 @@ static void tegra_qspi_remove(struct platform_device *pdev) struct tegra_qspi *tqspi = spi_controller_get_devdata(host); spi_unregister_controller(host); - free_irq(tqspi->irq, tqspi); pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_force_suspend(&pdev->dev); tegra_qspi_deinit_dma(tqspi);
Threaded IRQ handlers suffer from scheduler latency on heavily loaded systems, causing false transfer timeouts. Convert to hard IRQ handler that schedules work on a high-priority unbound workqueue. The hard IRQ handler verifies the interrupt, caches FIFO status, clears and masks interrupts, then schedules bottom-half processing. The workqueue handler runs in process context (can sleep for DMA) and can execute on any CPU, avoiding CPU0 bottlenecks. Signed-off-by: Vishwaroop A <va@nvidia.com> --- drivers/spi/spi-tegra210-quad.c | 128 +++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 44 deletions(-)