[RESEND] mtd: spi-nor: Fix Cadence QSPI RCU Schedule Stall
diff mbox series

Message ID 1565909736-11379-1-git-send-email-thor.thayer@linux.intel.com
State Accepted
Delegated to: Ambarus Tudor
Headers show
Series
  • [RESEND] mtd: spi-nor: Fix Cadence QSPI RCU Schedule Stall
Related show

Commit Message

Thor Thayer Aug. 15, 2019, 10:55 p.m. UTC
From: Thor Thayer <thor.thayer@linux.intel.com>

The current Cadence QSPI driver sometimes caused a
"rcu_sched self-detected stall" while writing large files.

Stall Report:
'# mtd_debug write /dev/mtd1 0 48816464 blob.img
[ 1815.454227] rcu: INFO: rcu_sched self-detected stall on CPU
[ 1815.459789] rcu:     0-....: (2099 ticks this GP) idle=8c6/1/0x40000002
 softirq=6492/6492 fqs=935
[ 1815.468442] rcu:      (t=2100 jiffies g=8749 q=247)
	<snip> (abbreviated backtrace)
[ 1815.772086] [<c05a3ea0>] (cqspi_exec_flash_cmd) (cqspi_read_reg)
[ 1815.786203] [<c05a5488>] (cqspi_read_reg) from (read_sr)
[ 1815.803790] [<c05a0330>] (read_sr) from
	(spi_nor_wait_till_ready_with_timeout)
[ 1815.816610] [<c05a182c>] (spi_nor_wait_till_ready_with_timeout) from
	(spi_nor_write+0x104/0x1d0)
[ 1815.836791] [<c05a1a44>] (spi_nor_write) from (part_write+0x50/0x58)
	<snip>
[ 1815.997961] cadence-qspi ff809000.spi: Flash command execution timed out.
[ 1816.004733] error -110 reading SR
file_to_flash: write, size 0x2e8e150, n 0x2e8e150
write(): Connection timed out

This was caused by a tight loop in cqspi_wait_for_bit(). Fix by using
readl_relaxed_poll_timeout() which sleeps 10us while polling a register.

Fit onto 80 character line by truncating the bool clear parameter

Fixes: 140623410536 ("mtd: spi-nor: Add driver for Cadence Quad SPI Flash Controller")
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
---
 drivers/mtd/spi-nor/cadence-quadspi.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

Comments

Ambarus Tudor Aug. 21, 2019, 7:29 a.m. UTC | #1
Miquel,

On 08/16/2019 01:55 AM, thor.thayer@linux.intel.com wrote:
> External E-Mail
> 
> 
> From: Thor Thayer <thor.thayer@linux.intel.com>
> 
> The current Cadence QSPI driver sometimes caused a
> "rcu_sched self-detected stall" while writing large files.
> 
> Stall Report:
> '# mtd_debug write /dev/mtd1 0 48816464 blob.img
> [ 1815.454227] rcu: INFO: rcu_sched self-detected stall on CPU
> [ 1815.459789] rcu:     0-....: (2099 ticks this GP) idle=8c6/1/0x40000002
>  softirq=6492/6492 fqs=935
> [ 1815.468442] rcu:      (t=2100 jiffies g=8749 q=247)
> 	<snip> (abbreviated backtrace)
> [ 1815.772086] [<c05a3ea0>] (cqspi_exec_flash_cmd) (cqspi_read_reg)
> [ 1815.786203] [<c05a5488>] (cqspi_read_reg) from (read_sr)
> [ 1815.803790] [<c05a0330>] (read_sr) from
> 	(spi_nor_wait_till_ready_with_timeout)
> [ 1815.816610] [<c05a182c>] (spi_nor_wait_till_ready_with_timeout) from
> 	(spi_nor_write+0x104/0x1d0)
> [ 1815.836791] [<c05a1a44>] (spi_nor_write) from (part_write+0x50/0x58)
> 	<snip>
> [ 1815.997961] cadence-qspi ff809000.spi: Flash command execution timed out.
> [ 1816.004733] error -110 reading SR
> file_to_flash: write, size 0x2e8e150, n 0x2e8e150
> write(): Connection timed out
> 
> This was caused by a tight loop in cqspi_wait_for_bit(). Fix by using
> readl_relaxed_poll_timeout() which sleeps 10us while polling a register.
> 
> Fit onto 80 character line by truncating the bool clear parameter
> 
> Fixes: 140623410536 ("mtd: spi-nor: Add driver for Cadence Quad SPI Flash Controller")
> Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>

Probably this is a good candidate for mtd/fixes.

Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com>

> ---
>  drivers/mtd/spi-nor/cadence-quadspi.c | 19 +++++--------------
>  1 file changed, 5 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
> index 67f15a1f16fd..7bef63947b29 100644
> --- a/drivers/mtd/spi-nor/cadence-quadspi.c
> +++ b/drivers/mtd/spi-nor/cadence-quadspi.c
> @@ -13,6 +13,7 @@
>  #include <linux/errno.h>
>  #include <linux/interrupt.h>
>  #include <linux/io.h>
> +#include <linux/iopoll.h>
>  #include <linux/jiffies.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
> @@ -241,23 +242,13 @@ struct cqspi_driver_platdata {
>  
>  #define CQSPI_IRQ_STATUS_MASK		0x1FFFF
>  
> -static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clear)
> +static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clr)
>  {
> -	unsigned long end = jiffies + msecs_to_jiffies(CQSPI_TIMEOUT_MS);
>  	u32 val;
>  
> -	while (1) {
> -		val = readl(reg);
> -		if (clear)
> -			val = ~val;
> -		val &= mask;
> -
> -		if (val == mask)
> -			return 0;
> -
> -		if (time_after(jiffies, end))
> -			return -ETIMEDOUT;
> -	}
> +	return readl_relaxed_poll_timeout(reg, val,
> +					  (((clr ? ~val : val) & mask) == mask),
> +					  10, CQSPI_TIMEOUT_MS * 1000);
>  }
>  
>  static bool cqspi_is_idle(struct cqspi_st *cqspi)
>
Ambarus Tudor Aug. 21, 2019, 8:22 a.m. UTC | #2
On 08/16/2019 01:55 AM, thor.thayer@linux.intel.com wrote:
> External E-Mail
> 
> 
> From: Thor Thayer <thor.thayer@linux.intel.com>
> 
> The current Cadence QSPI driver sometimes caused a
> "rcu_sched self-detected stall" while writing large files.
> 
> Stall Report:
> '# mtd_debug write /dev/mtd1 0 48816464 blob.img
> [ 1815.454227] rcu: INFO: rcu_sched self-detected stall on CPU
> [ 1815.459789] rcu:     0-....: (2099 ticks this GP) idle=8c6/1/0x40000002
>  softirq=6492/6492 fqs=935
> [ 1815.468442] rcu:      (t=2100 jiffies g=8749 q=247)
> 	<snip> (abbreviated backtrace)
> [ 1815.772086] [<c05a3ea0>] (cqspi_exec_flash_cmd) (cqspi_read_reg)
> [ 1815.786203] [<c05a5488>] (cqspi_read_reg) from (read_sr)
> [ 1815.803790] [<c05a0330>] (read_sr) from
> 	(spi_nor_wait_till_ready_with_timeout)
> [ 1815.816610] [<c05a182c>] (spi_nor_wait_till_ready_with_timeout) from
> 	(spi_nor_write+0x104/0x1d0)
> [ 1815.836791] [<c05a1a44>] (spi_nor_write) from (part_write+0x50/0x58)
> 	<snip>
> [ 1815.997961] cadence-qspi ff809000.spi: Flash command execution timed out.
> [ 1816.004733] error -110 reading SR
> file_to_flash: write, size 0x2e8e150, n 0x2e8e150
> write(): Connection timed out
> 
> This was caused by a tight loop in cqspi_wait_for_bit(). Fix by using
> readl_relaxed_poll_timeout() which sleeps 10us while polling a register.
> 
> Fit onto 80 character line by truncating the bool clear parameter
> 
> Fixes: 140623410536 ("mtd: spi-nor: Add driver for Cadence Quad SPI Flash Controller")
> Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
> ---
>  drivers/mtd/spi-nor/cadence-quadspi.c | 19 +++++--------------
>  1 file changed, 5 insertions(+), 14 deletions(-)
> 


Since the bug was not introduced in the previous release and we are quite late
for mtd/fixes,

Applied to https://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git,
spi-nor/next branch.

Thanks,
ta

Patch
diff mbox series

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index 67f15a1f16fd..7bef63947b29 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -13,6 +13,7 @@ 
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -241,23 +242,13 @@  struct cqspi_driver_platdata {
 
 #define CQSPI_IRQ_STATUS_MASK		0x1FFFF
 
-static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clear)
+static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clr)
 {
-	unsigned long end = jiffies + msecs_to_jiffies(CQSPI_TIMEOUT_MS);
 	u32 val;
 
-	while (1) {
-		val = readl(reg);
-		if (clear)
-			val = ~val;
-		val &= mask;
-
-		if (val == mask)
-			return 0;
-
-		if (time_after(jiffies, end))
-			return -ETIMEDOUT;
-	}
+	return readl_relaxed_poll_timeout(reg, val,
+					  (((clr ? ~val : val) & mask) == mask),
+					  10, CQSPI_TIMEOUT_MS * 1000);
 }
 
 static bool cqspi_is_idle(struct cqspi_st *cqspi)