diff mbox

[v2] mtd: gpmi: Deal with bitflips in erased regions regions

Message ID 1387133061-6573-2-git-send-email-eliedebrauwer@gmail.com
State New, archived
Headers show

Commit Message

Elie De Brauwer Dec. 15, 2013, 6:44 p.m. UTC
The BCH block typically used with a i.MX28 and GPMI block is only
able to correct bitflips on data actually streamed through the block.
When erasing a block the data does not stream through the BCH block
and therefore no ECC data is written to the NAND chip. This causes
gpmi_ecc_read_page to return failure as soon as a single non-1-bit is
found in an erased page. Typically causing problems at higher levels
(ubifs corrupted empty space warnings).

This patch configures the BCH block to mark a block as 'erased' if
no more than ecc_strength bitflips are found. Next HW_BCH_STATUS0:ALLONES
is used to check if the data read were all ones. If this was not
the case a slow path is entered where bitflips are counted and
corrected in software, allowing the upper layers to take proper actions.

Signed-off-by: Elie De Brauwer <eliedebrauwer@gmail.com>
Acked-by: Peter Korsgaard <peter@korsgaard.com>
---
 drivers/mtd/nand/gpmi-nand/bch-regs.h  |    2 ++
 drivers/mtd/nand/gpmi-nand/gpmi-lib.c  |   17 +++++++++++++
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c |   43 +++++++++++++++++++++++++++++---
 drivers/mtd/nand/gpmi-nand/gpmi-nand.h |    1 +
 4 files changed, 60 insertions(+), 3 deletions(-)

Comments

Huang Shijie Dec. 16, 2013, 4:30 a.m. UTC | #1
On Sun, Dec 15, 2013 at 07:44:21PM +0100, Elie De Brauwer wrote:
> The BCH block typically used with a i.MX28 and GPMI block is only
> able to correct bitflips on data actually streamed through the block.
> When erasing a block the data does not stream through the BCH block
> and therefore no ECC data is written to the NAND chip. This causes
> gpmi_ecc_read_page to return failure as soon as a single non-1-bit is
> found in an erased page. Typically causing problems at higher levels
> (ubifs corrupted empty space warnings).
> 
> This patch configures the BCH block to mark a block as 'erased' if
> no more than ecc_strength bitflips are found. Next HW_BCH_STATUS0:ALLONES
> is used to check if the data read were all ones. If this was not
> the case a slow path is entered where bitflips are counted and
> corrected in software, allowing the upper layers to take proper actions.
> 
> Signed-off-by: Elie De Brauwer <eliedebrauwer@gmail.com>
> Acked-by: Peter Korsgaard <peter@korsgaard.com>
> ---
>  drivers/mtd/nand/gpmi-nand/bch-regs.h  |    2 ++
>  drivers/mtd/nand/gpmi-nand/gpmi-lib.c  |   17 +++++++++++++
>  drivers/mtd/nand/gpmi-nand/gpmi-nand.c |   43 +++++++++++++++++++++++++++++---
>  drivers/mtd/nand/gpmi-nand/gpmi-nand.h |    1 +
>  4 files changed, 60 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/mtd/nand/gpmi-nand/bch-regs.h b/drivers/mtd/nand/gpmi-nand/bch-regs.h
> index 588f537..a30502f 100644
> --- a/drivers/mtd/nand/gpmi-nand/bch-regs.h
> +++ b/drivers/mtd/nand/gpmi-nand/bch-regs.h
> @@ -30,7 +30,9 @@
>  #define BM_BCH_CTRL_COMPLETE_IRQ		(1 << 0)
>  
>  #define HW_BCH_STATUS0				0x00000010
> +#define BM_BCH_STATUS0_ALLONES_MASK		(1 << 4)
>  #define HW_BCH_MODE				0x00000020
> +#define BM_BCH_MODE_ERASE_THRESHOLD_MASK	0xff
>  #define HW_BCH_ENCODEPTR			0x00000030
>  #define HW_BCH_DATAPTR				0x00000040
>  #define HW_BCH_METAPTR				0x00000050
> diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
> index aaced29..4551a38 100644
> --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
> +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
> @@ -286,6 +286,13 @@ int bch_set_geometry(struct gpmi_nand_data *this)
>  			| BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this),
>  			r->bch_regs + HW_BCH_FLASH0LAYOUT1);
>  
> +	/*
> +	 * Set the tolerance for bitflips when reading erased blocks
> +	 * equal to the ecc_strength.
> +	 */
Please also add the following comment :
   "We even detect the bitflips for SLC nand."

> +	writel(bch_geo->ecc_strength & BM_BCH_MODE_ERASE_THRESHOLD_MASK,
> +		r->bch_regs + HW_BCH_MODE);
> +

After discuss with the IC owner, we could set the bch_geo->gf_len here.

The bch_geo->gf_len is 13 or 14 now.

For the sake of safe, I suggest to set (bch_geo->gf_len / 2) for the ERASE_THRESHOLD.

I think this value is enough for us.


>  	/* Set *all* chip selects to use layout 0. */
>  	writel(0, r->bch_regs + HW_BCH_LAYOUTSELECT);
>  
> @@ -1094,6 +1101,16 @@ int gpmi_is_ready(struct gpmi_nand_data *this, unsigned chip)
>  	return reg & mask;
>  }
>  
> +/* Returns 1 if the last transaction consisted only out of ones. */
> +int gpmi_allones(struct gpmi_nand_data *this)
> +{
> +	struct resources *r = &this->resources;
> +	uint32_t reg = readl(r->gpmi_regs + HW_BCH_STATUS0);
please add a empty line here.
> +	if (reg & BM_BCH_STATUS0_ALLONES_MASK)
> +		return 1;
> +	return 0;
We can simplify the code to:
	return reg & BM_BCH_STATUS0_ALLONES_MASK;

> +}
> +
>  static inline void set_dma_type(struct gpmi_nand_data *this,
>  					enum dma_ops_type type)
>  {
> diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
> index dabbc14..82eac9b 100644
> --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
> +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
> @@ -1012,6 +1012,30 @@ static void block_mark_swapping(struct gpmi_nand_data *this,
>  	p[1] = (p[1] & mask) | (from_oob >> (8 - bit));
>  }
>  
> +/*
> + * Count the number of 0 bits in a supposed to be
> + * erased region and correct them. Return the number
> + * of bitflips or zero when the region was correct.
> + */
> +static unsigned int erased_sector_bitflips(unsigned char *data,
> +					unsigned int chunk,
> +					struct bch_geometry *geo)
> +{
> +	unsigned int flip_bits = 0;
> +	int i;
> +	int base = geo->ecc_chunk_size * chunk;
> +
> +	/* Count bitflips */
> +	for (i = 0; i < geo->ecc_chunk_size; i++)
> +		flip_bits += hweight8(~data[base + i]);
> +
> +	/* Correct bitflips by 0xFF'ing this chunk. */
> +	if (flip_bits)
> +		memset(&data[base], 0xFF, geo->ecc_chunk_size);
> +
> +	return flip_bits;
> +}
> +
>  static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
>  				uint8_t *buf, int oob_required, int page)
>  {
> @@ -1023,6 +1047,7 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
>  	dma_addr_t    auxiliary_phys;
>  	unsigned int  i;
>  	unsigned char *status;
> +	unsigned int  flips;
>  	unsigned int  max_bitflips = 0;
>  	int           ret;
>  
> @@ -1057,15 +1082,27 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
>  	status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
>  
>  	for (i = 0; i < nfc_geo->ecc_chunk_count; i++, status++) {
> -		if ((*status == STATUS_GOOD) || (*status == STATUS_ERASED))
> +		if (*status == STATUS_GOOD)
>  			continue;
>  
>  		if (*status == STATUS_UNCORRECTABLE) {
>  			mtd->ecc_stats.failed++;
>  			continue;
>  		}
> -		mtd->ecc_stats.corrected += *status;
> -		max_bitflips = max_t(unsigned int, max_bitflips, *status);
> +
> +		if (*status == STATUS_ERASED)
> +			if (gpmi_allones(this))
> +				continue;
> +			else
> +				/* Erased block with bitflips. */
> +				flips = erased_sector_bitflips(payload_virt, i,
> +							       nfc_geo);
> +		else
> +			/* BCH block corrected some errors for us. */
> +			flips = *status;

Please read the Documentation/CodeingStyle.
The code should like this:
  ........................................................
		if (*status == STATUS_ERASED) {
			if (gpmi_allones(this))
				continue;
			else
				/* Erased block with bitflips. */
				flips = erased_sector_bitflips(payload_virt, i,
							       nfc_geo);
		} else {
			/* BCH block corrected some errors for us. */
			flips = *status;
		}
  ........................................................
		
thanks
Huang Shijie
Elie De Brauwer Dec. 16, 2013, 9:43 a.m. UTC | #2
On Mon, Dec 16, 2013 at 5:30 AM, Huang Shijie <b32955@freescale.com> wrote:
> On Sun, Dec 15, 2013 at 07:44:21PM +0100, Elie De Brauwer wrote:
>> The BCH block typically used with a i.MX28 and GPMI block is only
>> able to correct bitflips on data actually streamed through the block.
>> When erasing a block the data does not stream through the BCH block
>> and therefore no ECC data is written to the NAND chip. This causes
>> gpmi_ecc_read_page to return failure as soon as a single non-1-bit is
>> found in an erased page. Typically causing problems at higher levels
>> (ubifs corrupted empty space warnings).
>>
>> This patch configures the BCH block to mark a block as 'erased' if
>> no more than ecc_strength bitflips are found. Next HW_BCH_STATUS0:ALLONES
>> is used to check if the data read were all ones. If this was not
>> the case a slow path is entered where bitflips are counted and
>> corrected in software, allowing the upper layers to take proper actions.
>>
>> Signed-off-by: Elie De Brauwer <eliedebrauwer@gmail.com>
>> Acked-by: Peter Korsgaard <peter@korsgaard.com>
>> ---
>>  drivers/mtd/nand/gpmi-nand/bch-regs.h  |    2 ++
>>  drivers/mtd/nand/gpmi-nand/gpmi-lib.c  |   17 +++++++++++++
>>  drivers/mtd/nand/gpmi-nand/gpmi-nand.c |   43 +++++++++++++++++++++++++++++---
>>  drivers/mtd/nand/gpmi-nand/gpmi-nand.h |    1 +
>>  4 files changed, 60 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/mtd/nand/gpmi-nand/bch-regs.h b/drivers/mtd/nand/gpmi-nand/bch-regs.h
>> index 588f537..a30502f 100644
>> --- a/drivers/mtd/nand/gpmi-nand/bch-regs.h
>> +++ b/drivers/mtd/nand/gpmi-nand/bch-regs.h
>> @@ -30,7 +30,9 @@
>>  #define BM_BCH_CTRL_COMPLETE_IRQ             (1 << 0)
>>
>>  #define HW_BCH_STATUS0                               0x00000010
>> +#define BM_BCH_STATUS0_ALLONES_MASK          (1 << 4)
>>  #define HW_BCH_MODE                          0x00000020
>> +#define BM_BCH_MODE_ERASE_THRESHOLD_MASK     0xff
>>  #define HW_BCH_ENCODEPTR                     0x00000030
>>  #define HW_BCH_DATAPTR                               0x00000040
>>  #define HW_BCH_METAPTR                               0x00000050
>> diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
>> index aaced29..4551a38 100644
>> --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
>> +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
>> @@ -286,6 +286,13 @@ int bch_set_geometry(struct gpmi_nand_data *this)
>>                       | BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this),
>>                       r->bch_regs + HW_BCH_FLASH0LAYOUT1);
>>
>> +     /*
>> +      * Set the tolerance for bitflips when reading erased blocks
>> +      * equal to the ecc_strength.
>> +      */
> Please also add the following comment :
>    "We even detect the bitflips for SLC nand."
>
>> +     writel(bch_geo->ecc_strength & BM_BCH_MODE_ERASE_THRESHOLD_MASK,
>> +             r->bch_regs + HW_BCH_MODE);
>> +
>
> After discuss with the IC owner, we could set the bch_geo->gf_len here.
>
> The bch_geo->gf_len is 13 or 14 now.
>
> For the sake of safe, I suggest to set (bch_geo->gf_len / 2) for the ERASE_THRESHOLD.
>
> I think this value is enough for us.
>
>
>>       /* Set *all* chip selects to use layout 0. */
>>       writel(0, r->bch_regs + HW_BCH_LAYOUTSELECT);
>>
>> @@ -1094,6 +1101,16 @@ int gpmi_is_ready(struct gpmi_nand_data *this, unsigned chip)
>>       return reg & mask;
>>  }
>>
>> +/* Returns 1 if the last transaction consisted only out of ones. */
>> +int gpmi_allones(struct gpmi_nand_data *this)
>> +{
>> +     struct resources *r = &this->resources;
>> +     uint32_t reg = readl(r->gpmi_regs + HW_BCH_STATUS0);
> please add a empty line here.
>> +     if (reg & BM_BCH_STATUS0_ALLONES_MASK)
>> +             return 1;
>> +     return 0;
> We can simplify the code to:
>         return reg & BM_BCH_STATUS0_ALLONES_MASK;
>

I was doing some stress testing on this piece of code today, but I'm
afraid the ALLONES solution apparently does not work. If I look at the
BCH_STATUS0 register while doing some heavy flash access, the
(entire) register remains zero at all times, while I would at least expect the
ALLONES_MASK to change when I'm reading erased sectors. Any
suggestions on this ? Since if we can't rely on the functioning of this bit
it would be better to stick to the first version of the patch (with the gf_len
threshold in place).

my 2 cents
E.
Huang Shijie Dec. 16, 2013, 1 p.m. UTC | #3
On Mon, Dec 16, 2013 at 10:43:34AM +0100, Elie De Brauwer wrote:
> On Mon, Dec 16, 2013 at 5:30 AM, Huang Shijie <b32955@freescale.com> wrote:
 >> +/* Returns 1 if the last transaction consisted only out of ones. */
> >> +int gpmi_allones(struct gpmi_nand_data *this)
> >> +{
> >> +     struct resources *r = &this->resources;
> >> +     uint32_t reg = readl(r->gpmi_regs + HW_BCH_STATUS0);
> > please add a empty line here.
> >> +     if (reg & BM_BCH_STATUS0_ALLONES_MASK)
> >> +             return 1;
> >> +     return 0;
> > We can simplify the code to:
> >         return reg & BM_BCH_STATUS0_ALLONES_MASK;
> >
> 
> I was doing some stress testing on this piece of code today, but I'm
> afraid the ALLONES solution apparently does not work. If I look at the
> BCH_STATUS0 register while doing some heavy flash access, the
> (entire) register remains zero at all times, while I would at least expect the
how do you do the stress test, and how do you know the register is zero?

I can test it on my side too.

thanks
Huang Shijie
Elie De Brauwer Dec. 16, 2013, 1:14 p.m. UTC | #4
On Mon, Dec 16, 2013 at 2:00 PM, Huang Shijie <shijie8@gmail.com> wrote:
> On Mon, Dec 16, 2013 at 10:43:34AM +0100, Elie De Brauwer wrote:
>> On Mon, Dec 16, 2013 at 5:30 AM, Huang Shijie <b32955@freescale.com> wrote:
>  >> +/* Returns 1 if the last transaction consisted only out of ones. */
>> >> +int gpmi_allones(struct gpmi_nand_data *this)
>> >> +{
>> >> +     struct resources *r = &this->resources;
>> >> +     uint32_t reg = readl(r->gpmi_regs + HW_BCH_STATUS0);
>> > please add a empty line here.
>> >> +     if (reg & BM_BCH_STATUS0_ALLONES_MASK)
>> >> +             return 1;
>> >> +     return 0;
>> > We can simplify the code to:
>> >         return reg & BM_BCH_STATUS0_ALLONES_MASK;
>> >
>>
>> I was doing some stress testing on this piece of code today, but I'm
>> afraid the ALLONES solution apparently does not work. If I look at the
>> BCH_STATUS0 register while doing some heavy flash access, the
>> (entire) register remains zero at all times, while I would at least expect the
> how do you do the stress test, and how do you know the register is zero?
>
> I can test it on my side too.

Well the stress test is simply dd if=/dev/mtdX of=/dev/null bs=1M (or nanddump).
And checking the state of the register.
To check whether or not the register is zero is simply (accumulative
to version 2
of my patch:

int gpmi_allones(struct gpmi_nand_data *this)
{
   struct resources *r = &this->resources;
   uint32_t reg = readl(r->gpmi_regs + HW_BCH_STATUS0);

   if (reg != 0)
      printk("%x\n", reg);
   return reg & BM_BCH_STATUS0_ALLONES_MASK;
}

And in gpmi_ecc_read_page() I added a call to it in the beginning of the loop:

   for (i = 0; i < nfc_geo->ecc_chunk_count; i++, status++) {
      gpmi_allones(this);
      if (*status == STATUS_GOOD)
         continue;

The printk() is never triggering, while I should at least expect it to
trigger when I'm
reading an erased block. (The boards I'm using for testing are 'cured'
from bitflips for now).
My tests are done on an i.mx283.
Huang Shijie Dec. 17, 2013, 2:23 a.m. UTC | #5
On Mon, Dec 16, 2013 at 02:14:03PM +0100, Elie De Brauwer wrote:
> 
> Well the stress test is simply dd if=/dev/mtdX of=/dev/null bs=1M (or nanddump).

Does the mtdX is a new erased partition ? or a used partition?


The 0 of the HW_BCH_STATUS0 means the page has _no_ error found.

I did not apply your patch, i just read out the register, My test patch is :

-------------------------------------------------------------------
@@ -1002,12 +1002,29 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	/* Loop over status bytes, accumulating ECC status. */
 	status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
 
+	{
+		struct resources *r = &this->resources;
+
+		u32 reg;
+
+		reg = readl(r->bch_regs +  0x10);
+		printk("[ %s ] stat : %x\n", __func__, reg);
+	}
+
 	for (i = 0; i < nfc_geo->ecc_chunk_count; i++, status++) {
 		if ((*status == STATUS_GOOD) || (*status == STATUS_ERASED))
 			continue;
 
 		if (*status == STATUS_UNCORRECTABLE) {
 			mtd->ecc_stats.failed++;
+			{
+				struct resources *r = &this->resources;
+
+				u32 reg;
+
+				reg = readl(r->bch_regs +  0x10);
+				printk("[ %s ] fail stat : %x\n", __func__, reg);
+			}
 			continue;
-------------------------------------------------------------------

In my imx6q-sabreauto board, when i use ubiattach, the log show likes:

-------------------------------------------------------
[   52.615042] UBI: attaching mtd4 to ubi0
[   52.616747] [ gpmi_ecc_read_page ] stat : 0
[   52.618224] [ gpmi_ecc_read_page ] stat : ff10
[   52.619802] [ gpmi_ecc_read_page ] stat : 0
[   52.621192] [ gpmi_ecc_read_page ] stat : ff10
[   52.622434] [ gpmi_ecc_read_page ] stat : 0
[   52.623637] [ gpmi_ecc_read_page ] stat : ff10
[   52.625113] [ gpmi_ecc_read_page ] stat : 0
[   52.626336] [ gpmi_ecc_read_page ] stat : ff10
[   52.627542] [ gpmi_ecc_read_page ] stat : 0
[   52.628741] [ gpmi_ecc_read_page ] stat : ff10
[   52.629940] [ gpmi_ecc_read_page ] stat : 0
[   52.631136] [ gpmi_ecc_read_page ] stat : ff10
[   52.632333] [ gpmi_ecc_read_page ] stat : 0
[   52.633527] [ gpmi_ecc_read_page ] stat : ff10
[   52.634741] [ gpmi_ecc_read_page ] stat : 0
[   52.635953] [ gpmi_ecc_read_page ] stat : ff10
[   52.637155] [ gpmi_ecc_read_page ] stat : 0
[   52.638354] [ gpmi_ecc_read_page ] stat : ff10
[   52.639551] [ gpmi_ecc_read_page ] stat : 0

   ...........................................................// Skip many lines

[   57.610635] [ gpmi_ecc_read_page ] fail stat : ff04
[   57.610671] UBI warning: ubi_io_read: error -74 (ECC error) while reading 4096 bytes from PEB 1997:4096, read only 4096 bytes, retry
[   57.611850] [ gpmi_ecc_read_page ] stat : ff04
[   57.611864] [ gpmi_ecc_read_page ] fail stat : ff04
[   57.611899] UBI warning: ubi_io_read: error -74 (ECC error) while reading 4096 bytes from PEB 1997:4096, read only 4096 bytes, retry
[   57.613075] [ gpmi_ecc_read_page ] stat : ff04
[   57.613087] [ gpmi_ecc_read_page ] fail stat : ff04
[   57.613122] UBI warning: ubi_io_read: error -74 (ECC error) while reading 4096 bytes from PEB 1997:4096, read only 4096 bytes, retry
[   57.615096] [ gpmi_ecc_read_page ] stat : ff04
[   57.615110] [ gpmi_ecc_read_page ] fail stat : ff04
[   57.615152] UBI error: ubi_io_read: error -74 (ECC error) while reading 4096 bytes from PEB 1997:4096, read 4096 bytes
[   57.615176] CPU: 0 PID: 831 Comm: ubiattach Not tainted 3.10.17-16984-g01eb085 #1378
[   57.615240] [<80013aec>] (unwind_backtrace+0x0/0xf8) from [<8001156c>] (show_stack+0x10/0x14)
[   57.615281] [<8001156c>] (show_stack+0x10/0x14) from [<80376420>] (ubi_io_read+0x130/0x2fc)
[   57.615309] [<80376420>] (ubi_io_read+0x130/0x2fc) from [<80376a1c>] (ubi_io_read_vid_hdr+0x48/0x21c)
[   57.615338] [<80376a1c>] (ubi_io_read_vid_hdr+0x48/0x21c) from [<8037b434>] (ubi_attach+0x264/0x14ac)
[   57.615364] [<8037b434>] (ubi_attach+0x264/0x14ac) from [<80370800>] (ubi_attach_mtd_dev+0x51c/0xbd8)
[   57.615387] [<80370800>] (ubi_attach_mtd_dev+0x51c/0xbd8) from [<80371134>] (ctrl_cdev_ioctl+0xd8/0x18c)
[   57.615414] [<80371134>] (ctrl_cdev_ioctl+0xd8/0x18c) from [<800cd55c>] (do_vfs_ioctl+0x80/0x5e4)
[   57.615437] [<800cd55c>] (do_vfs_ioctl+0x80/0x5e4) from [<800cdafc>] (SyS_ioctl+0x3c/0x5c)
[   57.615461] [<800cdafc>] (SyS_ioctl+0x3c/0x5c) from [<8000e040>] (ret_fast_syscall+0x0/0x30)
[   57.616800] [ gpmi_ecc_read_page ] stat : ff10
-----------------------------------------------------------------


thanks
Huang Shijie
Huang Shijie Dec. 17, 2013, 2:29 a.m. UTC | #6
On Mon, Dec 16, 2013 at 02:14:03PM +0100, Elie De Brauwer wrote:
> 
> Well the stress test is simply dd if=/dev/mtdX of=/dev/null bs=1M (or nanddump).
For a new erased partition, my patch shows log like:
----------------------------------------------------------
[  200.635358] [ gpmi_ecc_read_page ] stat : ff10
[  200.636522] [ gpmi_ecc_read_page ] stat : ff10
[  200.637675] [ gpmi_ecc_read_page ] stat : ff10
[  200.638827] [ gpmi_ecc_read_page ] stat : ff10
[  200.639980] [ gpmi_ecc_read_page ] stat : ff10
[  200.641130] [ gpmi_ecc_read_page ] stat : ff10
[  200.642285] [ gpmi_ecc_read_page ] stat : ff10
[  200.734553] [ gpmi_ecc_read_page ] stat : ff10
[  200.735726] [ gpmi_ecc_read_page ] stat : ff10
[  200.736882] [ gpmi_ecc_read_page ] stat : ff10
[  200.738037] [ gpmi_ecc_read_page ] stat : ff10
      ................................................
----------------------------------------------------------

thanks
Huang Shijie
Huang Shijie Dec. 17, 2013, 2:37 a.m. UTC | #7
On Mon, Dec 16, 2013 at 02:14:03PM +0100, Elie De Brauwer wrote:
> 
> int gpmi_allones(struct gpmi_nand_data *this)
> {
>    struct resources *r = &this->resources;
>    uint32_t reg = readl(r->gpmi_regs + HW_BCH_STATUS0);
I suddenly realise that you read a wrong register. :)

It should be r->bch_regs, not r->gpmi_regs.

thanks
Huang Shijie
Huang Shijie Dec. 17, 2013, 3:50 a.m. UTC | #8
On Sun, Dec 15, 2013 at 07:44:21PM +0100, Elie De Brauwer wrote:
> @@ -1057,15 +1082,27 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
>  	status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
>  
>  	for (i = 0; i < nfc_geo->ecc_chunk_count; i++, status++) {
> -		if ((*status == STATUS_GOOD) || (*status == STATUS_ERASED))
> +		if (*status == STATUS_GOOD)
>  			continue;
>  
>  		if (*status == STATUS_UNCORRECTABLE) {
>  			mtd->ecc_stats.failed++;
>  			continue;
>  		}
> -		mtd->ecc_stats.corrected += *status;
> -		max_bitflips = max_t(unsigned int, max_bitflips, *status);
> +
> +		if (*status == STATUS_ERASED)
> +			if (gpmi_allones(this))
> +				continue;
I think it is better to use "break" here, not "continue".

Since if the gpmi_allones() return true, it means all the ECC chunks are
0xff.

thanks
Huang Shijie.
Elie De Brauwer Dec. 17, 2013, 6:35 a.m. UTC | #9
On Tue, Dec 17, 2013 at 3:37 AM, Huang Shijie <b32955@freescale.com> wrote:
> On Mon, Dec 16, 2013 at 02:14:03PM +0100, Elie De Brauwer wrote:
>>
>> int gpmi_allones(struct gpmi_nand_data *this)
>> {
>>    struct resources *r = &this->resources;
>>    uint32_t reg = readl(r->gpmi_regs + HW_BCH_STATUS0);
> I suddenly realise that you read a wrong register. :)
>
> It should be r->bch_regs, not r->gpmi_regs.
>


Argh, good catch, I swear I was staring at the offsets yesterday but
didn't check the base addresses. I tested this on my board and now I
also get a functioning situation back at "full" speed. I'll come up
with a v3 shortly.

Thanks a lot
E.
Brian Norris Dec. 17, 2013, 7:10 a.m. UTC | #10
On Mon, Dec 16, 2013 at 12:30:41PM +0800, Huang Shijie wrote:
> On Sun, Dec 15, 2013 at 07:44:21PM +0100, Elie De Brauwer wrote:
> > --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
> > +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
> > @@ -286,6 +286,13 @@ int bch_set_geometry(struct gpmi_nand_data *this)
> >  			| BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this),
> >  			r->bch_regs + HW_BCH_FLASH0LAYOUT1);
> >  
> > +	/*
> > +	 * Set the tolerance for bitflips when reading erased blocks
> > +	 * equal to the ecc_strength.
> > +	 */
> Please also add the following comment :
>    "We even detect the bitflips for SLC nand."

I don't think we need a comment specifically about SLC here in the code.
Unless otherwise stated, I think any comments should apply to both SLC
and MLC, right? But it could be worth mentioning in the _commit message_
that this problem occurs on both MLC and SLC, just for history's sake.

> > @@ -1094,6 +1101,16 @@ int gpmi_is_ready(struct gpmi_nand_data *this, unsigned chip)
> >  	return reg & mask;
> >  }
> >  
> > +/* Returns 1 if the last transaction consisted only out of ones. */
> > +int gpmi_allones(struct gpmi_nand_data *this)

Sorry to nitpick, but 'allones' is a bit confusing. It took me a few
seconds to parse that, since it almost looks like it could be a real
English word. Perhaps gpmi_all_ones()?

Thanks,
Brian
diff mbox

Patch

diff --git a/drivers/mtd/nand/gpmi-nand/bch-regs.h b/drivers/mtd/nand/gpmi-nand/bch-regs.h
index 588f537..a30502f 100644
--- a/drivers/mtd/nand/gpmi-nand/bch-regs.h
+++ b/drivers/mtd/nand/gpmi-nand/bch-regs.h
@@ -30,7 +30,9 @@ 
 #define BM_BCH_CTRL_COMPLETE_IRQ		(1 << 0)
 
 #define HW_BCH_STATUS0				0x00000010
+#define BM_BCH_STATUS0_ALLONES_MASK		(1 << 4)
 #define HW_BCH_MODE				0x00000020
+#define BM_BCH_MODE_ERASE_THRESHOLD_MASK	0xff
 #define HW_BCH_ENCODEPTR			0x00000030
 #define HW_BCH_DATAPTR				0x00000040
 #define HW_BCH_METAPTR				0x00000050
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index aaced29..4551a38 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -286,6 +286,13 @@  int bch_set_geometry(struct gpmi_nand_data *this)
 			| BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this),
 			r->bch_regs + HW_BCH_FLASH0LAYOUT1);
 
+	/*
+	 * Set the tolerance for bitflips when reading erased blocks
+	 * equal to the ecc_strength.
+	 */
+	writel(bch_geo->ecc_strength & BM_BCH_MODE_ERASE_THRESHOLD_MASK,
+		r->bch_regs + HW_BCH_MODE);
+
 	/* Set *all* chip selects to use layout 0. */
 	writel(0, r->bch_regs + HW_BCH_LAYOUTSELECT);
 
@@ -1094,6 +1101,16 @@  int gpmi_is_ready(struct gpmi_nand_data *this, unsigned chip)
 	return reg & mask;
 }
 
+/* Returns 1 if the last transaction consisted only out of ones. */
+int gpmi_allones(struct gpmi_nand_data *this)
+{
+	struct resources *r = &this->resources;
+	uint32_t reg = readl(r->gpmi_regs + HW_BCH_STATUS0);
+	if (reg & BM_BCH_STATUS0_ALLONES_MASK)
+		return 1;
+	return 0;
+}
+
 static inline void set_dma_type(struct gpmi_nand_data *this,
 					enum dma_ops_type type)
 {
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index dabbc14..82eac9b 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1012,6 +1012,30 @@  static void block_mark_swapping(struct gpmi_nand_data *this,
 	p[1] = (p[1] & mask) | (from_oob >> (8 - bit));
 }
 
+/*
+ * Count the number of 0 bits in a supposed to be
+ * erased region and correct them. Return the number
+ * of bitflips or zero when the region was correct.
+ */
+static unsigned int erased_sector_bitflips(unsigned char *data,
+					unsigned int chunk,
+					struct bch_geometry *geo)
+{
+	unsigned int flip_bits = 0;
+	int i;
+	int base = geo->ecc_chunk_size * chunk;
+
+	/* Count bitflips */
+	for (i = 0; i < geo->ecc_chunk_size; i++)
+		flip_bits += hweight8(~data[base + i]);
+
+	/* Correct bitflips by 0xFF'ing this chunk. */
+	if (flip_bits)
+		memset(&data[base], 0xFF, geo->ecc_chunk_size);
+
+	return flip_bits;
+}
+
 static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 				uint8_t *buf, int oob_required, int page)
 {
@@ -1023,6 +1047,7 @@  static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	dma_addr_t    auxiliary_phys;
 	unsigned int  i;
 	unsigned char *status;
+	unsigned int  flips;
 	unsigned int  max_bitflips = 0;
 	int           ret;
 
@@ -1057,15 +1082,27 @@  static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
 
 	for (i = 0; i < nfc_geo->ecc_chunk_count; i++, status++) {
-		if ((*status == STATUS_GOOD) || (*status == STATUS_ERASED))
+		if (*status == STATUS_GOOD)
 			continue;
 
 		if (*status == STATUS_UNCORRECTABLE) {
 			mtd->ecc_stats.failed++;
 			continue;
 		}
-		mtd->ecc_stats.corrected += *status;
-		max_bitflips = max_t(unsigned int, max_bitflips, *status);
+
+		if (*status == STATUS_ERASED)
+			if (gpmi_allones(this))
+				continue;
+			else
+				/* Erased block with bitflips. */
+				flips = erased_sector_bitflips(payload_virt, i,
+							       nfc_geo);
+		else
+			/* BCH block corrected some errors for us. */
+			flips = *status;
+
+		mtd->ecc_stats.corrected += flips;
+		max_bitflips = max_t(unsigned int, max_bitflips, flips);
 	}
 
 	if (oob_required) {
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index a7685e3..4ddd6af 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -268,6 +268,7 @@  extern void gpmi_clear_bch(struct gpmi_nand_data *);
 extern void gpmi_dump_info(struct gpmi_nand_data *);
 extern int bch_set_geometry(struct gpmi_nand_data *);
 extern int gpmi_is_ready(struct gpmi_nand_data *, unsigned chip);
+extern int gpmi_allones(struct gpmi_nand_data *);
 extern int gpmi_send_command(struct gpmi_nand_data *);
 extern void gpmi_begin(struct gpmi_nand_data *);
 extern void gpmi_end(struct gpmi_nand_data *);