diff mbox

[03/21] mtd: nand: pxa3xx: Use a completion to signal device ready

Message ID 1379606505-2529-4-git-send-email-ezequiel.garcia@free-electrons.com
State Superseded
Headers show

Commit Message

Ezequiel Garcia Sept. 19, 2013, 4:01 p.m. UTC
Apparently, the expected behavior of the waitfunc() NAND chip call
is to wait for the device to be READY (this is a standard chip line).
However, the current implementation does almost nothing, which opens
a possibility to issue a command to a non-ready device.

Fix this by adding a new completion to wait for the ready event to arrive.

Because the "is ready" flag is cleared from the controller status
register, it's needed to store that state in the driver, and because the
field is accesed from an interruption, the field needs to be of an
atomic type.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
---
 drivers/mtd/nand/pxa3xx_nand.c | 45 +++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 14 deletions(-)

Comments

Brian Norris Oct. 2, 2013, 9:56 p.m. UTC | #1
On Thu, Sep 19, 2013 at 01:01:27PM -0300, Ezequiel Garcia wrote:
> Apparently, the expected behavior of the waitfunc() NAND chip call
> is to wait for the device to be READY (this is a standard chip line).
> However, the current implementation does almost nothing, which opens
> a possibility to issue a command to a non-ready device.
> 
> Fix this by adding a new completion to wait for the ready event to arrive.
> 
> Because the "is ready" flag is cleared from the controller status
> register, it's needed to store that state in the driver, and because the
> field is accesed from an interruption, the field needs to be of an
> atomic type.

Does it really need to be an atomic type? AIUI, you can hand off exclusive
control to the hardware when you begin a command. So the rest of the driver would be
inactive when the IRQ handler is setting the ready flag. And similarly,
the hardware should be inactive (i.e., no interrupts) when you are
modifying the ready flag in the driver (i.e., in cmdfunc).

Or perhaps I'm misunderstanding something.

> Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
> ---
>  drivers/mtd/nand/pxa3xx_nand.c | 45 +++++++++++++++++++++++++++++-------------
>  1 file changed, 31 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
> index a8d2ea7..fba91c2 100644
> --- a/drivers/mtd/nand/pxa3xx_nand.c
> +++ b/drivers/mtd/nand/pxa3xx_nand.c
> @@ -35,6 +35,7 @@
>  
>  #include <linux/platform_data/mtd-nand-pxa3xx.h>
>  
> +#define NAND_DEV_READY_TIMEOUT  50
>  #define	CHIP_DELAY_TIMEOUT	(2 * HZ/10)
>  #define NAND_STOP_DELAY		(2 * HZ/50)
>  #define PAGE_CHUNK_SIZE		(2048)
> @@ -167,7 +168,7 @@ struct pxa3xx_nand_info {
>  	struct clk		*clk;
>  	void __iomem		*mmio_base;
>  	unsigned long		mmio_phys;
> -	struct completion	cmd_complete;
> +	struct completion	cmd_complete, dev_ready;

Do you really need two completion structures? Does 'device ready' always
follow 'command complete', so that you can just use 'device ready' all
the time? And if so, does it make sense to just use dev_ready?

>  
>  	unsigned int 		buf_start;
>  	unsigned int		buf_count;

Brian
Ezequiel Garcia Oct. 4, 2013, 6:54 p.m. UTC | #2
Brian,

First of all: thanks for all your nice feedback.
I'm going through all of it right now, and just started
by this one.

On Wed, Oct 02, 2013 at 02:56:28PM -0700, Brian Norris wrote:
> On Thu, Sep 19, 2013 at 01:01:27PM -0300, Ezequiel Garcia wrote:
> > Apparently, the expected behavior of the waitfunc() NAND chip call
> > is to wait for the device to be READY (this is a standard chip line).
> > However, the current implementation does almost nothing, which opens
> > a possibility to issue a command to a non-ready device.
> > 
> > Fix this by adding a new completion to wait for the ready event to arrive.
> > 
> > Because the "is ready" flag is cleared from the controller status
> > register, it's needed to store that state in the driver, and because the
> > field is accesed from an interruption, the field needs to be of an
> > atomic type.
> 
> Does it really need to be an atomic type? AIUI, you can hand off exclusive
> control to the hardware when you begin a command. So the rest of the driver would be
> inactive when the IRQ handler is setting the ready flag. And similarly,
> the hardware should be inactive (i.e., no interrupts) when you are
> modifying the ready flag in the driver (i.e., in cmdfunc).
> 

Hm... I think that's not the case, and that's exactly why I had to add
an atomic type. See below.

> Or perhaps I'm misunderstanding something.
> 
> > Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
> > ---
> >  drivers/mtd/nand/pxa3xx_nand.c | 45 +++++++++++++++++++++++++++++-------------
> >  1 file changed, 31 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
> > index a8d2ea7..fba91c2 100644
> > --- a/drivers/mtd/nand/pxa3xx_nand.c
> > +++ b/drivers/mtd/nand/pxa3xx_nand.c
> > @@ -35,6 +35,7 @@
> >  
> >  #include <linux/platform_data/mtd-nand-pxa3xx.h>
> >  
> > +#define NAND_DEV_READY_TIMEOUT  50
> >  #define	CHIP_DELAY_TIMEOUT	(2 * HZ/10)
> >  #define NAND_STOP_DELAY		(2 * HZ/50)
> >  #define PAGE_CHUNK_SIZE		(2048)
> > @@ -167,7 +168,7 @@ struct pxa3xx_nand_info {
> >  	struct clk		*clk;
> >  	void __iomem		*mmio_base;
> >  	unsigned long		mmio_phys;
> > -	struct completion	cmd_complete;
> > +	struct completion	cmd_complete, dev_ready;
> 
> Do you really need two completion structures? Does 'device ready' always
> follow 'command complete', so that you can just use 'device ready' all
> the time? And if so, does it make sense to just use dev_ready?
> 
> >  
> >  	unsigned int 		buf_start;
> >  	unsigned int		buf_count;

The idea here is to handle the following situation. After a command is
completed (which also applies to the 'chunked' page command we want to
support later in the patchset) the controller sets a 'command done' flag
in the ISR.

The interruption handler calls the command done completion which causes
the cmdfunc to finish waiting and exit (or continue executing commands
in the 'chunked' case).

Notice that this does *not* guarantee the controller is ready since the
'ready' flag is independent of the 'command complete' flag and the IRQ can
arrive at any later point.

Let's suppose the cmdfunc() has exited, and the NAND base code is going to
continue the operation. In the PAGEPROG case, the NAND base calls waitfunc()
before continuing the operation.

At that point, we **need** to wait for the 'ready' flag because otherwise
the NAND base could issue another PAGEPROG command, while the controller
is not really ready to receive it.

However, if we enter the waitfunc() we want to read the info->is_ready flag
to check the completion has been initialized and a wait is suitable.
Otherwise, some commands that are not issued (with no wait completion
initialized) will stall at that waitfunc().

While at the waitfunc() the interruption handler might get called for
the 'ready' flag and so the info->is_ready variable gets access from two
different execution contexts.

Given the above behavior, I thought the only way of dealing with this
was to add a new completion and use atomic type for the is_ready flag.

Is the above clear? Do you think the implementation is correct for the
behavior?
Ezequiel Garcia Oct. 16, 2013, 8:23 p.m. UTC | #3
On Fri, Oct 04, 2013 at 03:54:54PM -0300, Ezequiel Garcia wrote:
> Brian,
> 
> First of all: thanks for all your nice feedback.
> I'm going through all of it right now, and just started
> by this one.
> 
> On Wed, Oct 02, 2013 at 02:56:28PM -0700, Brian Norris wrote:
> > On Thu, Sep 19, 2013 at 01:01:27PM -0300, Ezequiel Garcia wrote:
> > > Apparently, the expected behavior of the waitfunc() NAND chip call
> > > is to wait for the device to be READY (this is a standard chip line).
> > > However, the current implementation does almost nothing, which opens
> > > a possibility to issue a command to a non-ready device.
> > > 
> > > Fix this by adding a new completion to wait for the ready event to arrive.
> > > 
> > > Because the "is ready" flag is cleared from the controller status
> > > register, it's needed to store that state in the driver, and because the
> > > field is accesed from an interruption, the field needs to be of an
> > > atomic type.
> > 
> > Does it really need to be an atomic type? AIUI, you can hand off exclusive
> > control to the hardware when you begin a command. So the rest of the driver would be
> > inactive when the IRQ handler is setting the ready flag. And similarly,
> > the hardware should be inactive (i.e., no interrupts) when you are
> > modifying the ready flag in the driver (i.e., in cmdfunc).
> > 
> 
> Hm... I think that's not the case, and that's exactly why I had to add
> an atomic type. See below.
> 
> > Or perhaps I'm misunderstanding something.
> > 
> > > Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
> > > ---
> > >  drivers/mtd/nand/pxa3xx_nand.c | 45 +++++++++++++++++++++++++++++-------------
> > >  1 file changed, 31 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
> > > index a8d2ea7..fba91c2 100644
> > > --- a/drivers/mtd/nand/pxa3xx_nand.c
> > > +++ b/drivers/mtd/nand/pxa3xx_nand.c
> > > @@ -35,6 +35,7 @@
> > >  
> > >  #include <linux/platform_data/mtd-nand-pxa3xx.h>
> > >  
> > > +#define NAND_DEV_READY_TIMEOUT  50
> > >  #define	CHIP_DELAY_TIMEOUT	(2 * HZ/10)
> > >  #define NAND_STOP_DELAY		(2 * HZ/50)
> > >  #define PAGE_CHUNK_SIZE		(2048)
> > > @@ -167,7 +168,7 @@ struct pxa3xx_nand_info {
> > >  	struct clk		*clk;
> > >  	void __iomem		*mmio_base;
> > >  	unsigned long		mmio_phys;
> > > -	struct completion	cmd_complete;
> > > +	struct completion	cmd_complete, dev_ready;
> > 
> > Do you really need two completion structures? Does 'device ready' always
> > follow 'command complete', so that you can just use 'device ready' all
> > the time? And if so, does it make sense to just use dev_ready?
> > 
> > >  
> > >  	unsigned int 		buf_start;
> > >  	unsigned int		buf_count;
> 
> The idea here is to handle the following situation. After a command is
> completed (which also applies to the 'chunked' page command we want to
> support later in the patchset) the controller sets a 'command done' flag
> in the ISR.
> 
> The interruption handler calls the command done completion which causes
> the cmdfunc to finish waiting and exit (or continue executing commands
> in the 'chunked' case).
> 
> Notice that this does *not* guarantee the controller is ready since the
> 'ready' flag is independent of the 'command complete' flag and the IRQ can
> arrive at any later point.
> 
> Let's suppose the cmdfunc() has exited, and the NAND base code is going to
> continue the operation. In the PAGEPROG case, the NAND base calls waitfunc()
> before continuing the operation.
> 
> At that point, we **need** to wait for the 'ready' flag because otherwise
> the NAND base could issue another PAGEPROG command, while the controller
> is not really ready to receive it.
> 
> However, if we enter the waitfunc() we want to read the info->is_ready flag
> to check the completion has been initialized and a wait is suitable.
> Otherwise, some commands that are not issued (with no wait completion
> initialized) will stall at that waitfunc().
> 
> While at the waitfunc() the interruption handler might get called for
> the 'ready' flag and so the info->is_ready variable gets access from two
> different execution contexts.
> 
> Given the above behavior, I thought the only way of dealing with this
> was to add a new completion and use atomic type for the is_ready flag.
> 
> Is the above clear? Do you think the implementation is correct for the
> behavior?

Brian:

I'm picking up on my NAND work, and wanted to know if you have any
comments about this patch. Otherwise, I'll just leave it as it is.

FWIW, any comments about the locking scheme is always highly appreciated
as it's a complex and important issue, so feel free to ask any questions
or suggest any improvements.

Thanks,
diff mbox

Patch

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index a8d2ea7..fba91c2 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -35,6 +35,7 @@ 
 
 #include <linux/platform_data/mtd-nand-pxa3xx.h>
 
+#define NAND_DEV_READY_TIMEOUT  50
 #define	CHIP_DELAY_TIMEOUT	(2 * HZ/10)
 #define NAND_STOP_DELAY		(2 * HZ/50)
 #define PAGE_CHUNK_SIZE		(2048)
@@ -167,7 +168,7 @@  struct pxa3xx_nand_info {
 	struct clk		*clk;
 	void __iomem		*mmio_base;
 	unsigned long		mmio_phys;
-	struct completion	cmd_complete;
+	struct completion	cmd_complete, dev_ready;
 
 	unsigned int 		buf_start;
 	unsigned int		buf_count;
@@ -197,7 +198,13 @@  struct pxa3xx_nand_info {
 	int			use_ecc;	/* use HW ECC ? */
 	int			use_dma;	/* use DMA ? */
 	int			use_spare;	/* use spare ? */
-	int			is_ready;
+
+	/*
+	 * The is_ready flag is accesed from several places,
+	 * including an interruption hander. We need an atomic
+	 * type to avoid races.
+	 */
+	atomic_t		is_ready;
 
 	unsigned int		page_size;	/* page size of attached chip */
 	unsigned int		data_size;	/* data size in FIFO */
@@ -457,7 +464,7 @@  static void start_data_dma(struct pxa3xx_nand_info *info)
 static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
 {
 	struct pxa3xx_nand_info *info = devid;
-	unsigned int status, is_completed = 0;
+	unsigned int status, is_completed = 0, is_ready = 0;
 	unsigned int ready, cmd_done;
 
 	if (info->cs == 0) {
@@ -493,8 +500,9 @@  static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
 		is_completed = 1;
 	}
 	if (status & ready) {
-		info->is_ready = 1;
+		atomic_set(&info->is_ready, 1);
 		info->state = STATE_READY;
+		is_ready = 1;
 	}
 
 	if (status & NDSR_WRCMDREQ) {
@@ -523,6 +531,8 @@  static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
 	nand_writel(info, NDSR, status);
 	if (is_completed)
 		complete(&info->cmd_complete);
+	if (is_ready)
+		complete(&info->dev_ready);
 NORMAL_IRQ_EXIT:
 	return IRQ_HANDLED;
 }
@@ -554,7 +564,6 @@  static int prepare_command_pool(struct pxa3xx_nand_info *info, int command,
 	info->use_ecc		= 0;
 	info->use_spare		= 1;
 	info->use_dma		= (use_dma) ? 1 : 0;
-	info->is_ready		= 0;
 	info->retcode		= ERR_NONE;
 	if (info->cs != 0)
 		info->ndcb0 = NDCB0_CSEL;
@@ -730,6 +739,8 @@  static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 	exec_cmd = prepare_command_pool(info, command, column, page_addr);
 	if (exec_cmd) {
 		init_completion(&info->cmd_complete);
+		init_completion(&info->dev_ready);
+		atomic_set(&info->is_ready, 0);
 		pxa3xx_nand_start(info);
 
 		ret = wait_for_completion_timeout(&info->cmd_complete,
@@ -842,21 +853,27 @@  static int pxa3xx_nand_waitfunc(struct mtd_info *mtd, struct nand_chip *this)
 {
 	struct pxa3xx_nand_host *host = mtd->priv;
 	struct pxa3xx_nand_info *info = host->info_data;
+	int ret;
+
+	/* Need to wait? */
+	if (!atomic_read(&info->is_ready)) {
+		ret = wait_for_completion_timeout(&info->dev_ready,
+				CHIP_DELAY_TIMEOUT);
+		if (!ret) {
+			dev_err(&info->pdev->dev, "Ready time out!!!\n");
+			return NAND_STATUS_FAIL;
+		}
+	}
 
 	/* pxa3xx_nand_send_command has waited for command complete */
 	if (this->state == FL_WRITING || this->state == FL_ERASING) {
 		if (info->retcode == ERR_NONE)
 			return 0;
-		else {
-			/*
-			 * any error make it return 0x01 which will tell
-			 * the caller the erase and write fail
-			 */
-			return 0x01;
-		}
+		else
+			return NAND_STATUS_FAIL;
 	}
 
-	return 0;
+	return NAND_STATUS_READY;
 }
 
 static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
@@ -1000,7 +1017,7 @@  static int pxa3xx_nand_sensing(struct pxa3xx_nand_info *info)
 		return ret;
 
 	pxa3xx_nand_cmdfunc(mtd, NAND_CMD_RESET, 0, 0);
-	if (info->is_ready)
+	if (atomic_read(&info->is_ready))
 		return 0;
 
 	return -ENODEV;