diff mbox series

mtd: cfi_cmdset_0002: Change erase functions to retry for error

Message ID TY1PR01MB095425A96233A864806D0635DC9A0@TY1PR01MB0954.jpnprd01.prod.outlook.com
State Superseded
Headers show
Series mtd: cfi_cmdset_0002: Change erase functions to retry for error | expand

Commit Message

IKEGAMI Tokunori May 8, 2018, 4:52 p.m. UTC
From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>

For the word write functions it is retried for error.
But it is not implemented to retry for the erase functions.
To make sure for the erase functions change to retry as same.

This is needed to prevent the flash erase error caused only once.
It was caused by the error case of chip_good() in the do_erase_oneblock().
Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
But the error issue behavior is not able to reproduce at this moment.
The flash controller is parallel Flash interface integrated on BCM53003.

Also the change is possible to resolve other erase error.
For example the Erase suspend issue was caused on Cypress AMD NOR flash.
  S29GL01GS / S29GL512S / S29GL256S / S29GL128S

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: linux-mtd@lists.infradead.org
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Comments

Boris Brezillon May 8, 2018, 5:11 p.m. UTC | #1
+Joakim

On Tue, 8 May 2018 16:52:55 +0000
IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:

> From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> 
> For the word write functions it is retried for error.
> But it is not implemented to retry for the erase functions.
> To make sure for the erase functions change to retry as same.
> 
> This is needed to prevent the flash erase error caused only once.
> It was caused by the error case of chip_good() in the do_erase_oneblock().
> Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> But the error issue behavior is not able to reproduce at this moment.

Hm, that's a problem. How can you be sure the retry approach fixes the
issue if you can't reproduce the it?

> The flash controller is parallel Flash interface integrated on BCM53003.
> 
> Also the change is possible to resolve other erase error.
> For example the Erase suspend issue was caused on Cypress AMD NOR flash.
>   S29GL01GS / S29GL512S / S29GL256S / S29GL128S

Is the Erase suspend issue related to this problem?

> 
> Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
> Cc: Brian Norris <computersforpeace@gmail.com>
> Cc: David Woodhouse <dwmw2@infradead.org>
> Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
> Cc: Marek Vasut <marek.vasut@gmail.com>
> Cc: Richard Weinberger <richard@nod.at>
> Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
> Cc: linux-mtd@lists.infradead.org
> ---
>  drivers/mtd/chips/cfi_cmdset_0002.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
> index 56aa6b75213d..f522a856526a 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> @@ -43,6 +43,7 @@
>  #define FORCE_WORD_WRITE 0
>  
>  #define MAX_WORD_RETRIES 3
> +#define MAX_ERASE_RETRY 3
>  
>  #define SST49LF004B	        0x0060
>  #define SST49LF040B	        0x0050
> @@ -2240,6 +2241,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	unsigned long int adr;
>  	DECLARE_WAITQUEUE(wait, current);
>  	int ret = 0;
> +	int retry_cnt = 0;
>  
>  	adr = cfi->addr_unlock1;
>  
> @@ -2257,6 +2259,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	ENABLE_VPP(map);
>  	xip_disable(map, chip, adr);
>  
> + retry:
>  	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> @@ -2310,6 +2313,9 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  		map_write( map, CMD(0xF0), chip->start );
>  		/* FIXME - should have reset delay before continuing */
>  
> +		if (++retry_cnt <= MAX_ERASE_RETRY)
> +			goto retry;
> +
>  		ret = -EIO;
>  	}
>  
> @@ -2329,6 +2335,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  	unsigned long timeo = jiffies + HZ;
>  	DECLARE_WAITQUEUE(wait, current);
>  	int ret = 0;
> +	int retry_cnt = 0;
>  
>  	adr += chip->start;
>  
> @@ -2346,6 +2353,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  	ENABLE_VPP(map);
>  	xip_disable(map, chip, adr);
>  
> + retry:
>  	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> @@ -2402,6 +2410,9 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  		map_write( map, CMD(0xF0), chip->start );
>  		/* FIXME - should have reset delay before continuing */
>  
> +		if (++retry_cnt <= MAX_ERASE_RETRY)
> +			goto retry;
> +
>  		ret = -EIO;
>  	}
>
IKEGAMI Tokunori May 8, 2018, 5:35 p.m. UTC | #2
Hi Boris-san,

Thanks for your reviewing.

> > This is needed to prevent the flash erase error caused only once.
> > It was caused by the error case of chip_good() in the do_erase_oneblock().
> > Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> > But the error issue behavior is not able to reproduce at this moment.
> 
> Hm, that's a problem. How can you be sure the retry approach fixes the
> issue if you can't reproduce the it?

  I can reproduce the flash write error by the same reproduction method.
  And the error can be resolved by the same approach fixes as below.
    <http://lists.infradead.org/pipermail/linux-mtd/2018-May/080794.html>
  So it seems that the erase error also can be resolved by the changes.

> > The flash controller is parallel Flash interface integrated on BCM53003.
> >
> > Also the change is possible to resolve other erase error.
> > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S
> 
> Is the Erase suspend issue related to this problem?

  I am not sure about the issue behavior detail since the issue is not caused on my environment.
  The issue was mentioned by Jocke-san on the following mail.
    <http://lists.infradead.org/pipermail/linux-mtd/2018-April/080518.html>
  Also I am asking to him to try the original patch on this thread.

Regards,
Ikegami
Boris Brezillon May 8, 2018, 6:02 p.m. UTC | #3
Hi,

On Tue, 8 May 2018 17:35:48 +0000
IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:

> Hi Boris-san,
> 
> Thanks for your reviewing.
> 
> > > This is needed to prevent the flash erase error caused only once.
> > > It was caused by the error case of chip_good() in the do_erase_oneblock().
> > > Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> > > But the error issue behavior is not able to reproduce at this moment.  
> > 
> > Hm, that's a problem. How can you be sure the retry approach fixes the
> > issue if you can't reproduce the it?  
> 
>   I can reproduce the flash write error by the same reproduction method.
>   And the error can be resolved by the same approach fixes as below.
>     <http://lists.infradead.org/pipermail/linux-mtd/2018-May/080794.html>

Hm, even the bug fix for the write path looks suspicious. BTW, you're
only checking the last written word, so how can you be sure all other
words have been written correctly? I don't know a lot about CFI chips,
but are you sure this is not a timing issue (timings mis-configured on
the controller side)?

>   So it seems that the erase error also can be resolved by the changes.

Well, I'd be more comfortable if someone else could confirm the bug
(ideally tested with a different controller) and validate the fix.

> 
> > > The flash controller is parallel Flash interface integrated on BCM53003.
> > >
> > > Also the change is possible to resolve other erase error.
> > > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> > >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S  
> > 
> > Is the Erase suspend issue related to this problem?  
> 
>   I am not sure about the issue behavior detail since the issue is not caused on my environment.
>   The issue was mentioned by Jocke-san on the following mail.
>     <http://lists.infradead.org/pipermail/linux-mtd/2018-April/080518.html>
>   Also I am asking to him to try the original patch on this thread.

Ok, let's wait for Joakim's feedback then.

Thanks,

Boris
IKEGAMI Tokunori May 9, 2018, 12:27 a.m. UTC | #4
Hi Boris-san,

Thanks for your comments.

> >   I can reproduce the flash write error by the same reproduction method.
> >   And the error can be resolved by the same approach fixes as below.
> >
> <http://lists.infradead.org/pipermail/linux-mtd/2018-May/080794.html>
> 
> Hm, even the bug fix for the write path looks suspicious. BTW, you're
> only checking the last written word, so how can you be sure all other
> words have been written correctly?

  For the write buffer I think that it is enough to check only the last word.
  Since it is described by the data sheets to check the operation status.

> I don't know a lot about CFI chips,
> but are you sure this is not a timing issue (timings mis-configured on
> the controller side)?

  Yes I think that this is not a timing issue by mis-configuration.
  But it is still possible to be a timing issue but not mis-configured.
  The write and erase errors were caused by enabling CPU external sync mode.
  For the CPU erratum workaround the sync mode is required to be enabled.

> >   Also I am asking to him to try the original patch on this thread.
> 
> Ok, let's wait for Joakim's feedback then.

  Noted this.

Regards,
Ikegami
Joakim Tjernlund May 9, 2018, 8:18 a.m. UTC | #5
On Tue, 2018-05-08 at 19:11 +0200, Boris Brezillon wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.
> 
> 
> +Joakim
> 
> On Tue, 8 May 2018 16:52:55 +0000
> IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:
> 
> > From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> > 
> > For the word write functions it is retried for error.
> > But it is not implemented to retry for the erase functions.
> > To make sure for the erase functions change to retry as same.
> > 
> > This is needed to prevent the flash erase error caused only once.
> > It was caused by the error case of chip_good() in the do_erase_oneblock().
> > Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> > But the error issue behavior is not able to reproduce at this moment.
> 
> Hm, that's a problem. How can you be sure the retry approach fixes the
> issue if you can't reproduce the it?
> 
> > The flash controller is parallel Flash interface integrated on BCM53003.
> > 
> > Also the change is possible to resolve other erase error.
> > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S
> 
> Is the Erase suspend issue related to this problem?
> 

My Erase suspend issue is not related to just retrying the erase.
We have seen several times that too many Erase suspend for one block will cause 5.5.2.6 DQ5:
 Exceeded Timing Limits
This condition is not checked for, nor handled in any way.
Once this condition has occurred, a simple retry wont fix it. One must
first issue a flash reset:
  map_write( map, CMD(0xF0), chip->in_progress_block_addr); /* Reset */
which will clear this error condition and return the flash to read array more.
Now you can restart the erase again and it will probably finish the erase
this time(at least in our testing).

Testing for DQ5: Exceeded Timing Limits is not straight forward in current 0002 driver
since it requires more than just testing for toggling bits. I have not
made any progress on this due to other more pressing work.

Note: In our testing we needed over 300000 suspend/resume before hitting the DQ5
limit.
Here is my test hack so far:
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 23a893ab4264..38b52e2c6fb6 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -829,6 +829,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
                chip->oldstate = FL_ERASING;
                chip->state = FL_ERASE_SUSPENDING;
                chip->erase_suspended = 1;
+               chip->in_progress_suspends++;
                for (;;) {
                        if (chip_ready(map, adr))
                                break;
@@ -839,8 +840,33 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
                                 * there was an error (so leave the erase
                                 * routine to recover from it) or we trying to
                                 * use the erase-in-progress sector. */
+                               map_word status = map_read(map, adr);
+
+                               map_write( map, CMD(0xF0), chip->in_progress_block_addr); /* Reset */
+#if 1
+                               /* Restart erase */
+                               cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               map_write(map, cfi->sector_erase_cmd, chip->in_progress_block_addr);
+#endif
+//                             chip->oldstate = FL_READY;
+//                             chip->state = FL_READY;
+
                                put_chip(map, chip, adr);
-                               printk(KERN_ERR "MTD %s(): chip not ready after erase suspend\n", __func__);
+                               printk(KERN_ERR "MTD %s(): chip not ready after erase suspend, block_addr:0x%lx, "
+                                      "block_mask:0x%lx, adr:0x%lx, suspends:%ld, status:0x%lx \n", __func__,
+                                      chip->in_progress_block_addr, chip->in_progress_block_addr, adr,
+                                      chip->in_progress_suspends,
+                                      status.x[0]);
+
                                return -EIO;
                        }
 
@@ -2270,7 +2296,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
        chip->erase_suspended = 0;
        chip->in_progress_block_addr = adr;
        chip->in_progress_block_mask = ~(map->size - 1);
-
+       chip->in_progress_suspends = 0;
        INVALIDATE_CACHE_UDELAY(map, chip,
                                adr, map->size,
                                chip->erase_time*500);
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index 3529683f691e..8e7ba8244ced 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -86,6 +86,7 @@ struct flchip {
        unsigned int erase_suspended:1;
        unsigned long in_progress_block_addr;
        unsigned long in_progress_block_mask;
+       unsigned long in_progress_suspends;
 
        struct mutex mutex;
        wait_queue_head_t wq; /* Wait on here when we're waiting for the chip


Which printed(when error occurred):
MTD get_chip(): chip not ready after erase suspend, block_addr:0x3480000, block_mask:0x3480000, adr:0x3a7da84,
suspends:319941, status:0x28


 Jocke
IKEGAMI Tokunori May 9, 2018, 10:20 a.m. UTC | #6
Hi Jocke-san,

  Thanks for the mail.
  I have just understood about the Erase suspend issue is not related to the change.
  Also it is not able to be resolved by the chagne.

Hi Boris-san,

  So the below commit message is not correct so I will update it to remove to describe the latter part about the Erase suspend issue.

> > > Also the change is possible to resolve other erase error.
> > > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> > >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S 

Regards,
Ikegami

> -----Original Message-----
> From: Joakim Tjernlund [mailto:Joakim.Tjernlund@infinera.com]
> Sent: Wednesday, May 09, 2018 5:19 PM
> To: boris.brezillon@bootlin.com; IKEGAMI Tokunori
> Cc: linux-mtd@lists.infradead.org; PACKHAM Chris;
> cyrille.pitchen@wedev4u.fr; dwmw2@infradead.org;
> computersforpeace@gmail.com; boris.brezillon@free-electrons.com;
> marek.vasut@gmail.com; richard@nod.at
> Subject: Re: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry
> for error
> 
> On Tue, 2018-05-08 at 19:11 +0200, Boris Brezillon wrote:
> > CAUTION: This email originated from outside of the organization. Do not
> click links or open attachments unless you recognize the sender and know
> the content is safe.
> >
> >
> > +Joakim
> >
> > On Tue, 8 May 2018 16:52:55 +0000
> > IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:
> >
> > > From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> > >
> > > For the word write functions it is retried for error.
> > > But it is not implemented to retry for the erase functions.
> > > To make sure for the erase functions change to retry as same.
> > >
> > > This is needed to prevent the flash erase error caused only once.
> > > It was caused by the error case of chip_good() in the
> do_erase_oneblock().
> > > Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> > > But the error issue behavior is not able to reproduce at this moment.
> >
> > Hm, that's a problem. How can you be sure the retry approach fixes the
> > issue if you can't reproduce the it?
> >
> > > The flash controller is parallel Flash interface integrated on BCM53003.
> > >
> > > Also the change is possible to resolve other erase error.
> > > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> > >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S
> >
> > Is the Erase suspend issue related to this problem?
> >
> 
> My Erase suspend issue is not related to just retrying the erase.
> We have seen several times that too many Erase suspend for one block will
> cause 5.5.2.6 DQ5:
>  Exceeded Timing Limits
> This condition is not checked for, nor handled in any way.
> Once this condition has occurred, a simple retry wont fix it. One must
> first issue a flash reset:
>   map_write( map, CMD(0xF0), chip->in_progress_block_addr); /* Reset */
> which will clear this error condition and return the flash to read array
> more.
> Now you can restart the erase again and it will probably finish the erase
> this time(at least in our testing).
> 
> Testing for DQ5: Exceeded Timing Limits is not straight forward in current
> 0002 driver
> since it requires more than just testing for toggling bits. I have not
> made any progress on this due to other more pressing work.
> 
> Note: In our testing we needed over 300000 suspend/resume before hitting
> the DQ5
> limit.
> Here is my test hack so far:
> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c
> b/drivers/mtd/chips/cfi_cmdset_0002.c
> index 23a893ab4264..38b52e2c6fb6 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> @@ -829,6 +829,7 @@ static int get_chip(struct map_info *map, struct flchip
> *chip, unsigned long adr
>                 chip->oldstate = FL_ERASING;
>                 chip->state = FL_ERASE_SUSPENDING;
>                 chip->erase_suspended = 1;
> +               chip->in_progress_suspends++;
>                 for (;;) {
>                         if (chip_ready(map, adr))
>                                 break;
> @@ -839,8 +840,33 @@ static int get_chip(struct map_info *map, struct flchip
> *chip, unsigned long adr
>                                  * there was an error (so leave the erase
>                                  * routine to recover from it) or we trying
> to
>                                  * use the erase-in-progress sector. */
> +                               map_word status = map_read(map, adr);
> +
> +                               map_write( map, CMD(0xF0),
> chip->in_progress_block_addr); /* Reset */
> +#if 1
> +                               /* Restart erase */
> +                               cfi_send_gen_cmd(0xAA,
> cfi->addr_unlock1, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               cfi_send_gen_cmd(0x55,
> cfi->addr_unlock2, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               cfi_send_gen_cmd(0x80,
> cfi->addr_unlock1, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               cfi_send_gen_cmd(0xAA,
> cfi->addr_unlock1, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               cfi_send_gen_cmd(0x55,
> cfi->addr_unlock2, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               map_write(map, cfi->sector_erase_cmd,
> chip->in_progress_block_addr);
> +#endif
> +//                             chip->oldstate = FL_READY;
> +//                             chip->state = FL_READY;
> +
>                                 put_chip(map, chip, adr);
> -                               printk(KERN_ERR "MTD %s(): chip not ready
> after erase suspend\n", __func__);
> +                               printk(KERN_ERR "MTD %s(): chip not ready
> after erase suspend, block_addr:0x%lx, "
> +                                      "block_mask:0x%lx, adr:0x%lx,
> suspends:%ld, status:0x%lx \n", __func__,
> +                                      chip->in_progress_block_addr,
> chip->in_progress_block_addr, adr,
> +                                      chip->in_progress_suspends,
> +                                      status.x[0]);
> +
>                                 return -EIO;
>                         }
> 
> @@ -2270,7 +2296,7 @@ static int __xipram do_erase_chip(struct map_info
> *map, struct flchip *chip)
>         chip->erase_suspended = 0;
>         chip->in_progress_block_addr = adr;
>         chip->in_progress_block_mask = ~(map->size - 1);
> -
> +       chip->in_progress_suspends = 0;
>         INVALIDATE_CACHE_UDELAY(map, chip,
>                                 adr, map->size,
>                                 chip->erase_time*500);
> diff --git a/include/linux/mtd/flashchip.h
> b/include/linux/mtd/flashchip.h
> index 3529683f691e..8e7ba8244ced 100644
> --- a/include/linux/mtd/flashchip.h
> +++ b/include/linux/mtd/flashchip.h
> @@ -86,6 +86,7 @@ struct flchip {
>         unsigned int erase_suspended:1;
>         unsigned long in_progress_block_addr;
>         unsigned long in_progress_block_mask;
> +       unsigned long in_progress_suspends;
> 
>         struct mutex mutex;
>         wait_queue_head_t wq; /* Wait on here when we're waiting for the
> chip
> 
> 
> Which printed(when error occurred):
> MTD get_chip(): chip not ready after erase suspend, block_addr:0x3480000,
> block_mask:0x3480000, adr:0x3a7da84,
> suspends:319941, status:0x28
> 
> 
>  Jocke
diff mbox series

Patch

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 56aa6b75213d..f522a856526a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -43,6 +43,7 @@ 
 #define FORCE_WORD_WRITE 0
 
 #define MAX_WORD_RETRIES 3
+#define MAX_ERASE_RETRY 3
 
 #define SST49LF004B	        0x0060
 #define SST49LF040B	        0x0050
@@ -2240,6 +2241,7 @@  static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	unsigned long int adr;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr = cfi->addr_unlock1;
 
@@ -2257,6 +2259,7 @@  static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2310,6 +2313,9 @@  static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_ERASE_RETRY)
+			goto retry;
+
 		ret = -EIO;
 	}
 
@@ -2329,6 +2335,7 @@  static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	unsigned long timeo = jiffies + HZ;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr += chip->start;
 
@@ -2346,6 +2353,7 @@  static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2402,6 +2410,9 @@  static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_ERASE_RETRY)
+			goto retry;
+
 		ret = -EIO;
 	}