diff mbox

[v7,3/6] mtd: nand: omap: ecc.correct: omap_elm_correct_data: fix erased-page detection for BCHx_HW ECC schemes

Message ID 1389942797-4632-4-git-send-email-pekon@ti.com
State Superseded
Headers show

Commit Message

pekon gupta Jan. 17, 2014, 7:13 a.m. UTC
As erased-pages do not have ECC stored in their OOB area, so they need to be
seperated out from programmed-pages, before doing BCH ECC correction.

In current implementation of omap_elm_correct_data() which does ECC correction
for BCHx ECC schemes, this erased-pages are detected based on specific marker
byte (reserved as 0x00) in ecc-layout.
However, this approach has some limitation like;
 1) All ecc-scheme layouts do not have such Reserved byte marker to
    differentiate between erased-page v/s programmed-page. Thus this is a
    customized solution.
 2) Reserved marker byte can itself be subjected to bit-flips causing
    erased-page to be misunderstood as programmed-page.

This patch removes dependency on any marker byte in ecc-layout, to differentiate
between erased-page v/s programeed-page. Instead a page is considered erased if
 (a) all(OOB)  == 0xff, .i.e., number of zero-bits in read_ecc[] == 0
 (b) number of zero-bits in (Data + OOB) is less than ecc-strength

This patch also adds a generic function count_zero_bits(), to find number of
bits which are '0' in given buffer. This function is optimized for comparing
read-data with 0xff.

Signed-off-by: Pekon Gupta <pekon@ti.com>
---
 drivers/mtd/nand/omap2.c | 88 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 71 insertions(+), 17 deletions(-)

Comments

Brian Norris Feb. 11, 2014, 9:34 p.m. UTC | #1
Hi Pekon,

On Fri, Jan 17, 2014 at 12:43:14PM +0530, Pekon Gupta wrote:
> As erased-pages do not have ECC stored in their OOB area, so they need to be
> seperated out from programmed-pages, before doing BCH ECC correction.
> 
> In current implementation of omap_elm_correct_data() which does ECC correction
> for BCHx ECC schemes, this erased-pages are detected based on specific marker
> byte (reserved as 0x00) in ecc-layout.
> However, this approach has some limitation like;
>  1) All ecc-scheme layouts do not have such Reserved byte marker to
>     differentiate between erased-page v/s programmed-page. Thus this is a
>     customized solution.
>  2) Reserved marker byte can itself be subjected to bit-flips causing
>     erased-page to be misunderstood as programmed-page.
> 
> This patch removes dependency on any marker byte in ecc-layout, to differentiate
> between erased-page v/s programeed-page. Instead a page is considered erased if
>  (a) all(OOB)  == 0xff, .i.e., number of zero-bits in read_ecc[] == 0
>  (b) number of zero-bits in (Data + OOB) is less than ecc-strength
> 
> This patch also adds a generic function count_zero_bits(), to find number of
> bits which are '0' in given buffer. This function is optimized for comparing
> read-data with 0xff.
> 
> Signed-off-by: Pekon Gupta <pekon@ti.com>
> ---
>  drivers/mtd/nand/omap2.c | 88 ++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 71 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
> index 2c73389..f833fbb 100644
> --- a/drivers/mtd/nand/omap2.c
> +++ b/drivers/mtd/nand/omap2.c
> @@ -1331,6 +1331,30 @@ static int erased_sector_bitflips(u_char *data, u_char *oob,
>  }
>  
>  /**
> + * count_zero_bits - count number of bit-flips in given buffer
> + * @buf:	pointer to buffer
> + * @length:	buffer length
> + * @max_bitflips: maximum number of correctable bit-flips (ecc.strength)
> + * $bitflip_count: pointer to store bit-flip count
> + *
> + * Counts number of bit-flips in given buffer, returns with error
> + * as soon as count exceeds max_bitflips limit.
> + */
> +static int count_zero_bits(u_char *buf, int length,
> +					 int max_bitflips, int *bitflip_count)
> +{	int i;
> +
> +	for (i = 0; i < length; i++) {
> +		if (unlikely(buf[i] != 0xff)) {
> +			*bitflip_count += hweight8(~buf[i]);
> +			if (unlikely(*bitflip_count > max_bitflips))
> +				return -EBADMSG;
> +		}
> +	}
> +	return 0;
> +}
> +
> +/**
>   * omap_elm_correct_data - corrects page data area in case error reported
>   * @mtd:	MTD device structure
>   * @data:	page data
> @@ -1359,14 +1383,21 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
>  {
>  	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
>  			mtd);
> -	int eccbytes	= info->nand.ecc.bytes;
> -	int eccsteps = info->nand.ecc.steps;
> +	struct nand_ecc_ctrl *ecc = &info->nand.ecc;
> +	int eccstrength		= ecc->strength;
> +	int eccsize		= ecc->size;
> +	int eccbytes		= ecc->bytes;
> +	int eccsteps		= ecc->steps;

When I recommended adding the 'ecc' variable (as you did), I was
suggesting you drop the next 4 variables. You don't need them when you
can (with just as few characters) refer to ecc->field directly and
easily. Stick with one or ther other -- the 4 local variables or the 1
ecc pointer -- but you don't need both.

>  	int i , j, stat = 0;
>  	int eccflag, actual_eccbytes;
>  	struct elm_errorvec err_vec[ERROR_VECTOR_MAX];
>  	u_char *ecc_vec = calc_ecc;
>  	u_char *spare_ecc = read_ecc;
>  	u_char *erased_ecc_vec;
> +	u_char *buf;
> +	int bitflip_count;
> +	int err;
> +	bool page_is_erased;
>  	enum bch_ecc type;
>  	bool is_error_reported = false;
>  
> @@ -1410,24 +1441,47 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
>  		}
>  
>  		if (eccflag == 1) {
> -			/*
> -			 * Set threshold to minimum of 4, half of ecc.strength/2
> -			 * to allow max bit flip in byte to 4
> -			 */
> -			unsigned int threshold = min_t(unsigned int, 4,
> -					info->nand.ecc.strength / 2);
> +			bitflip_count = 0;
> +			/* count zero-bits in OOB region */
> +			err = count_zero_bits(read_ecc, eccbytes,
> +						 eccstrength, &bitflip_count);
> +			if (err) {
> +				/*
> +				 * number of zero-bits in OOB > ecc-strength
> +				 * either un-correctable or programmed-page
> +				 */
> +				page_is_erased = false;
> +			} else if (bitflip_count == 0) {
> +				/* OOB == all(0xff) */
> +				page_is_erased = true;

This else-if is still incorrect. You cannot assume that just because the
OOB is all 0xff that the page is erased.

> +			} else {
> +				/*
> +				 * OOB has some bits as '0' but
> +				 * number of zero-bits in OOB < ecc-strength.
> +				 * It may be erased-page with bit-flips so,
> +				 * further checks are needed for confirmation
> +				 */
> +				/* count zero-bits in DATA region */
> +				buf = &data[eccsize * i];
> +				err = count_zero_bits(buf, eccsize,
> +						 eccstrength, &bitflip_count);
> +				if (err) {
> +					/*
> +					 * total number of zero-bits in OOB
> +					 * and DATA exceeds ecc-strength
> +					 */
> +					page_is_erased = false;
> +				} else {
> +					/* number of zero-bits < ecc-strength */
> +					page_is_erased = true;
> +				}
> +			}

This whole block (where you call count_zero_bits() twice) is a
convoluted and buggy way of just doing the following, AFAIU:

	page_is_erased = !count_zero_bits(read_ecc, ecc->bytes,
				ecc->strength, &bitflip_count) &&
			 !count_zero_bits(&data[ecc->size * i], ecc->size,
			 	ecc->strength, &bitflip_count);

You could modify that to your liking and add a comment, but it's much
more concise than your version, and from what I can tell, it's actually
correct.

>  
>  			/*
> -			 * Check data area is programmed by counting
> -			 * number of 0's at fixed offset in spare area.
> -			 * Checking count of 0's against threshold.
> -			 * In case programmed page expects at least threshold
> -			 * zeros in byte.
> -			 * If zeros are less than threshold for programmed page/
> -			 * zeros are more than threshold erased page, either
> -			 * case page reported as uncorrectable.
> +			 * erased-pages do not have ECC stored in OOB area,
> +			 * so they need to be handled separately.
>  			 */
> -			if (hweight8(~read_ecc[actual_eccbytes]) >= threshold) {
> +			if (!page_is_erased) {
>  				/*
>  				 * Update elm error vector as
>  				 * data area is programmed

Brian
pekon gupta Feb. 12, 2014, 10:31 a.m. UTC | #2
Hi Brian,

>From: Brian Norris [mailto:computersforpeace@gmail.com]
>>On Fri, Jan 17, 2014 at 12:43:14PM +0530, Pekon Gupta wrote:
>> As erased-pages do not have ECC stored in their OOB area, so they need to be
>> seperated out from programmed-pages, before doing BCH ECC correction.
>>
>> In current implementation of omap_elm_correct_data() which does ECC correction
>> for BCHx ECC schemes, this erased-pages are detected based on specific marker
>> byte (reserved as 0x00) in ecc-layout.
>> However, this approach has some limitation like;
>>  1) All ecc-scheme layouts do not have such Reserved byte marker to
>>     differentiate between erased-page v/s programmed-page. Thus this is a
>>     customized solution.
>>  2) Reserved marker byte can itself be subjected to bit-flips causing
>>     erased-page to be misunderstood as programmed-page.
>>
>> This patch removes dependency on any marker byte in ecc-layout, to differentiate
>> between erased-page v/s programeed-page. Instead a page is considered erased if
>>  (a) all(OOB)  == 0xff, .i.e., number of zero-bits in read_ecc[] == 0
>>  (b) number of zero-bits in (Data + OOB) is less than ecc-strength
>>
>> This patch also adds a generic function count_zero_bits(), to find number of
>> bits which are '0' in given buffer. This function is optimized for comparing
>> read-data with 0xff.
>>
>> Signed-off-by: Pekon Gupta <pekon@ti.com>
>> ---

[...]

>> +/**
>>   * omap_elm_correct_data - corrects page data area in case error reported
>>   * @mtd:	MTD device structure
>>   * @data:	page data
>> @@ -1359,14 +1383,21 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
>>  {
>>  	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
>>  			mtd);
>> -	int eccbytes	= info->nand.ecc.bytes;
>> -	int eccsteps = info->nand.ecc.steps;
>> +	struct nand_ecc_ctrl *ecc = &info->nand.ecc;
>> +	int eccstrength		= ecc->strength;
>> +	int eccsize		= ecc->size;
>> +	int eccbytes		= ecc->bytes;
>> +	int eccsteps		= ecc->steps;
>
>When I recommended adding the 'ecc' variable (as you did), I was
>suggesting you drop the next 4 variables. You don't need them when you
>can (with just as few characters) refer to ecc->field directly and
>easily. Stick with one or ther other -- the 4 local variables or the 1
>ecc pointer -- but you don't need both.
>
Oh sorry.. I misunderstood that. I'll fix this in next version.


>> @@ -1410,24 +1441,47 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
>>  		}
>>
>>  		if (eccflag == 1) {
>> -			/*
>> -			 * Set threshold to minimum of 4, half of ecc.strength/2
>> -			 * to allow max bit flip in byte to 4
>> -			 */
>> -			unsigned int threshold = min_t(unsigned int, 4,
>> -					info->nand.ecc.strength / 2);
>> +			bitflip_count = 0;
>> +			/* count zero-bits in OOB region */
>> +			err = count_zero_bits(read_ecc, eccbytes,
>> +						 eccstrength, &bitflip_count);
>> +			if (err) {
>> +				/*
>> +				 * number of zero-bits in OOB > ecc-strength
>> +				 * either un-correctable or programmed-page
>> +				 */
>> +				page_is_erased = false;
>> +			} else if (bitflip_count == 0) {
>> +				/* OOB == all(0xff) */
>> +				page_is_erased = true;
>
>This else-if is still incorrect. You cannot assume that just because the
>OOB is all 0xff that the page is erased.
>
Now this part is most important, where I would like to get more clarity
and feedback before I proceed.
----------------------------
if OOB of an page is == all(0xff)
(a) As per general NAND spec, chip->write_buf() _only_ fills the internal buffers of
 NAND device's with information to be written. Actual write to NAND array cells
 happen only after 'NAND_CMD_PAGEPROG' or 'NAND_CMD_CACHEDPROG' is issued.
 Thus both Main-area and OOB-area are programmed simultaneously inside NAND device.
 if there is any disruption (like power-cut) in on-going  NAND_CMD_PAGEPROG
 cycle, then the data should be assumed to be garbage, and it may have un-stable bits.

(b) if OOB==all(0xff) then there is _no_ ECC stored in OOB to check the read_data
  Hence driver cannot distinguish between genuine data and bit-flips.

Now based on (a) & (b), it's safe to assume that:
if (OOB == all 0xff)
	/* page has no-data | garbage-data*/
Agree ?
(This is where I call it page_is_erased==true, Though I could have used better
variable name as page_has_no_valid_data = true).
----------------------------

And also, driver should return 'total number zero-bits in both Main-area + OOB'
if (0 < 'total number of zero-bits in both OOB + DATA region' < mtd->bitflip_threshold)
	return -EUCLEAN; 
else
	return -EBADMSG;

** However there is a bug in current patch version which I just figured out.
I don't include the number of zero-bits of Main-area, if OOB==all(0xff).
 +			} else if (bitflip_count == 0) {
 +				/* OOB == all(0xff) */
 +				page_is_erased = true;
 +  /* missing */		bitflip_count += count_zero_bits(buf, eccsize,
 +					  		eccstrength, &bitflip_count);
------------------------

>> +			} else {
>> +				/*
>> +				 * OOB has some bits as '0' but
>> +				 * number of zero-bits in OOB < ecc-strength.
>> +				 * It may be erased-page with bit-flips so,
>> +				 * further checks are needed for confirmation
>> +				 */
>> +				/* count zero-bits in DATA region */
>> +				buf = &data[eccsize * i];
>> +				err = count_zero_bits(buf, eccsize,
>> +						 eccstrength, &bitflip_count);
>> +				if (err) {
>> +					/*
>> +					 * total number of zero-bits in OOB
>> +					 * and DATA exceeds ecc-strength
>> +					 */
>> +					page_is_erased = false;
>> +				} else {
>> +					/* number of zero-bits < ecc-strength */
>> +					page_is_erased = true;
>> +				}
>> +			}
>
>This whole block (where you call count_zero_bits() twice) is a
>convoluted and buggy way of just doing the following, AFAIU:
>
>	page_is_erased = !count_zero_bits(read_ecc, ecc->bytes,
>				ecc->strength, &bitflip_count) &&
>			 !count_zero_bits(&data[ecc->size * i], ecc->size,
>			 	ecc->strength, &bitflip_count);
>
>You could modify that to your liking and add a comment, but it's much
>more concise than your version, and from what I can tell, it's actually
>correct.
>
Firstly, In you above statement    's/&&/||'
because as per above statement, if count_zero_bits(oob) == 0, then
my patch assumes 'page_has_no_valid_data = true'.

page_is_erased = !count_zero_bits(read_ecc, ecc->bytes,
				ecc->strength, &bitflip_count) ||
			 !count_zero_bits(&data[ecc->size * i], ecc->size,
			 	ecc->strength, &bitflip_count);


Secondly, You are missing the that here NAND driver is relaxing the limit of
number-of-acceptable bit-flips in an erased-page, because some MLC and 
Toshiba SLC NANDs show bit-flips almost on every second already erased-page.
>> +				if (err) {
>> +					/*
>> +					 * total number of zero-bits in OOB
>> +					 * and DATA exceeds ecc-strength
>> +					 */
>> +					page_is_erased = false;
>> +				} else {
>> +					/* number of zero-bits < ecc-strength */
>> +					page_is_erased = true;
>> +				}

In above patch I can replace ecc->strength with mtd->bitflip_threshold
to make it more user controllable. As bitflip_threshold is configurable via
/sys/class/mtd/mtdX/bitflip_threshold
What is your opinion ?


This was also the basis of my question to Artem [1]

 [1] http://lists.infradead.org/pipermail/linux-mtd/2014-January/051556.html

With regards, pekon
Brian Norris March 7, 2014, 7:11 p.m. UTC | #3
Hi Pekon,

On Wed, Feb 12, 2014 at 10:31:02AM +0000, Pekon Gupta wrote:
> >From: Brian Norris [mailto:computersforpeace@gmail.com]
> >>On Fri, Jan 17, 2014 at 12:43:14PM +0530, Pekon Gupta wrote:
> >> This patch removes dependency on any marker byte in ecc-layout, to differentiate
> >> between erased-page v/s programeed-page. Instead a page is considered erased if
> >>  (a) all(OOB)  == 0xff, .i.e., number of zero-bits in read_ecc[] == 0
> >>  (b) number of zero-bits in (Data + OOB) is less than ecc-strength
[...]
> >> @@ -1410,24 +1441,47 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
> >>  		}
> >>
> >>  		if (eccflag == 1) {
> >> -			/*
> >> -			 * Set threshold to minimum of 4, half of ecc.strength/2
> >> -			 * to allow max bit flip in byte to 4
> >> -			 */
> >> -			unsigned int threshold = min_t(unsigned int, 4,
> >> -					info->nand.ecc.strength / 2);
> >> +			bitflip_count = 0;
> >> +			/* count zero-bits in OOB region */
> >> +			err = count_zero_bits(read_ecc, eccbytes,
> >> +						 eccstrength, &bitflip_count);
> >> +			if (err) {
> >> +				/*
> >> +				 * number of zero-bits in OOB > ecc-strength
> >> +				 * either un-correctable or programmed-page
> >> +				 */
> >> +				page_is_erased = false;
> >> +			} else if (bitflip_count == 0) {
> >> +				/* OOB == all(0xff) */
> >> +				page_is_erased = true;
> >
> >This else-if is still incorrect. You cannot assume that just because the
> >OOB is all 0xff that the page is erased.
> >
> Now this part is most important, where I would like to get more clarity
> and feedback before I proceed.
> ----------------------------
> if OOB of an page is == all(0xff)
> (a) As per general NAND spec, chip->write_buf() _only_ fills the internal buffers of
>  NAND device's with information to be written. Actual write to NAND array cells
>  happen only after 'NAND_CMD_PAGEPROG' or 'NAND_CMD_CACHEDPROG' is issued.

I'm not really sure why you bring up chip->write_buf(); not all
implementations actually use this. But anyway...

>  Thus both Main-area and OOB-area are programmed simultaneously inside NAND device.
>  if there is any disruption (like power-cut) in on-going  NAND_CMD_PAGEPROG
>  cycle, then the data should be assumed to be garbage, and it may have un-stable bits.

OK. But this is not at all what we're trying to check; we are checking
*only* whether this particular page reads mostly-0xff, due to regular
bitflips on an otherwise unprogrammed page. The "assumed garbage"
conclusion is really irrelevant here.

> (b) if OOB==all(0xff) then there is _no_ ECC stored in OOB to check the read_data

I disagree. Can you guarantee that there is *no* data pattern in which
the matching ECC syndrome bytes are all 0xff?

>   Hence driver cannot distinguish between genuine data and bit-flips.
> 
> Now based on (a) & (b), it's safe to assume that:
> if (OOB == all 0xff)
> 	/* page has no-data | garbage-data*/
> Agree ?

No. But more importantly, why do you want to make this conclusion?
Aren't we *only* trying to make a decision of "is this page
mostly-0xff"?

Or more directly: you are using the above conclusion ("page has garbage
data") to then erase the buffer entirely. This is totally, 100% bogus
because the data area might have junk (partially-programmed page?) but
the OOB could be all 0xff -- and you are now lying to the upper layers,
saying the page is cleanly 0xff!!!

I believe one of the two of us has a very dire misunderstanding here.
Please help me decide which of us this is :)

> (This is where I call it page_is_erased==true, Though I could have used better
> variable name as page_has_no_valid_data = true).
> ----------------------------
> 
> And also, driver should return 'total number zero-bits in both Main-area + OOB'
> if (0 < 'total number of zero-bits in both OOB + DATA region' < mtd->bitflip_threshold)
> 	return -EUCLEAN; 
> else
> 	return -EBADMSG;
> 
> ** However there is a bug in current patch version which I just figured out.
> I don't include the number of zero-bits of Main-area, if OOB==all(0xff).
>  +			} else if (bitflip_count == 0) {
>  +				/* OOB == all(0xff) */
>  +				page_is_erased = true;
>  +  /* missing */		bitflip_count += count_zero_bits(buf, eccsize,
>  +					  		eccstrength, &bitflip_count);
> ------------------------
> 
> >> +			} else {
> >> +				/*
> >> +				 * OOB has some bits as '0' but
> >> +				 * number of zero-bits in OOB < ecc-strength.
> >> +				 * It may be erased-page with bit-flips so,
> >> +				 * further checks are needed for confirmation
> >> +				 */
> >> +				/* count zero-bits in DATA region */
> >> +				buf = &data[eccsize * i];
> >> +				err = count_zero_bits(buf, eccsize,
> >> +						 eccstrength, &bitflip_count);
> >> +				if (err) {
> >> +					/*
> >> +					 * total number of zero-bits in OOB
> >> +					 * and DATA exceeds ecc-strength
> >> +					 */
> >> +					page_is_erased = false;
> >> +				} else {
> >> +					/* number of zero-bits < ecc-strength */
> >> +					page_is_erased = true;
> >> +				}
> >> +			}
> >
> >This whole block (where you call count_zero_bits() twice) is a
> >convoluted and buggy way of just doing the following, AFAIU:
> >
> >	page_is_erased = !count_zero_bits(read_ecc, ecc->bytes,
> >				ecc->strength, &bitflip_count) &&
> >			 !count_zero_bits(&data[ecc->size * i], ecc->size,
> >			 	ecc->strength, &bitflip_count);
> >
> >You could modify that to your liking and add a comment, but it's much
> >more concise than your version, and from what I can tell, it's actually
> >correct.
> >
> Firstly, In you above statement    's/&&/||'

No. I wrote it exactly as I meant it. We need to check both the data
area and the OOB, and if either cause us to exceed the ECC strength,
then the page is not erased.

> because as per above statement, if count_zero_bits(oob) == 0, then
> my patch assumes 'page_has_no_valid_data = true'.

And that assumption is 100% wrong, AIUI.

> page_is_erased = !count_zero_bits(read_ecc, ecc->bytes,
> 				ecc->strength, &bitflip_count) ||
> 			 !count_zero_bits(&data[ecc->size * i], ecc->size,
> 			 	ecc->strength, &bitflip_count);
> 
> 
> Secondly, You are missing the that here NAND driver is relaxing the limit of
> number-of-acceptable bit-flips in an erased-page, because some MLC and 
> Toshiba SLC NANDs show bit-flips almost on every second already erased-page.

No, I'm not missing this. The MTD/NAND layers already take care of the
"number-of-acceptable bit-flips" using the mtd->bitflip_threshold
parameter.

> >> +				if (err) {
> >> +					/*
> >> +					 * total number of zero-bits in OOB
> >> +					 * and DATA exceeds ecc-strength
> >> +					 */
> >> +					page_is_erased = false;
> >> +				} else {
> >> +					/* number of zero-bits < ecc-strength */
> >> +					page_is_erased = true;
> >> +				}
> 
> In above patch I can replace ecc->strength with mtd->bitflip_threshold
> to make it more user controllable. As bitflip_threshold is configurable via
> /sys/class/mtd/mtdX/bitflip_threshold
> What is your opinion ?

The hardware driver should not be comparing with 'bitflip_threshold'.
Comparing against ecc_strength is correct, as we are simulating the
current ECC correction strength.

Brian
diff mbox

Patch

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 2c73389..f833fbb 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1331,6 +1331,30 @@  static int erased_sector_bitflips(u_char *data, u_char *oob,
 }
 
 /**
+ * count_zero_bits - count number of bit-flips in given buffer
+ * @buf:	pointer to buffer
+ * @length:	buffer length
+ * @max_bitflips: maximum number of correctable bit-flips (ecc.strength)
+ * $bitflip_count: pointer to store bit-flip count
+ *
+ * Counts number of bit-flips in given buffer, returns with error
+ * as soon as count exceeds max_bitflips limit.
+ */
+static int count_zero_bits(u_char *buf, int length,
+					 int max_bitflips, int *bitflip_count)
+{	int i;
+
+	for (i = 0; i < length; i++) {
+		if (unlikely(buf[i] != 0xff)) {
+			*bitflip_count += hweight8(~buf[i]);
+			if (unlikely(*bitflip_count > max_bitflips))
+				return -EBADMSG;
+		}
+	}
+	return 0;
+}
+
+/**
  * omap_elm_correct_data - corrects page data area in case error reported
  * @mtd:	MTD device structure
  * @data:	page data
@@ -1359,14 +1383,21 @@  static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
 {
 	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
 			mtd);
-	int eccbytes	= info->nand.ecc.bytes;
-	int eccsteps = info->nand.ecc.steps;
+	struct nand_ecc_ctrl *ecc = &info->nand.ecc;
+	int eccstrength		= ecc->strength;
+	int eccsize		= ecc->size;
+	int eccbytes		= ecc->bytes;
+	int eccsteps		= ecc->steps;
 	int i , j, stat = 0;
 	int eccflag, actual_eccbytes;
 	struct elm_errorvec err_vec[ERROR_VECTOR_MAX];
 	u_char *ecc_vec = calc_ecc;
 	u_char *spare_ecc = read_ecc;
 	u_char *erased_ecc_vec;
+	u_char *buf;
+	int bitflip_count;
+	int err;
+	bool page_is_erased;
 	enum bch_ecc type;
 	bool is_error_reported = false;
 
@@ -1410,24 +1441,47 @@  static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
 		}
 
 		if (eccflag == 1) {
-			/*
-			 * Set threshold to minimum of 4, half of ecc.strength/2
-			 * to allow max bit flip in byte to 4
-			 */
-			unsigned int threshold = min_t(unsigned int, 4,
-					info->nand.ecc.strength / 2);
+			bitflip_count = 0;
+			/* count zero-bits in OOB region */
+			err = count_zero_bits(read_ecc, eccbytes,
+						 eccstrength, &bitflip_count);
+			if (err) {
+				/*
+				 * number of zero-bits in OOB > ecc-strength
+				 * either un-correctable or programmed-page
+				 */
+				page_is_erased = false;
+			} else if (bitflip_count == 0) {
+				/* OOB == all(0xff) */
+				page_is_erased = true;
+			} else {
+				/*
+				 * OOB has some bits as '0' but
+				 * number of zero-bits in OOB < ecc-strength.
+				 * It may be erased-page with bit-flips so,
+				 * further checks are needed for confirmation
+				 */
+				/* count zero-bits in DATA region */
+				buf = &data[eccsize * i];
+				err = count_zero_bits(buf, eccsize,
+						 eccstrength, &bitflip_count);
+				if (err) {
+					/*
+					 * total number of zero-bits in OOB
+					 * and DATA exceeds ecc-strength
+					 */
+					page_is_erased = false;
+				} else {
+					/* number of zero-bits < ecc-strength */
+					page_is_erased = true;
+				}
+			}
 
 			/*
-			 * Check data area is programmed by counting
-			 * number of 0's at fixed offset in spare area.
-			 * Checking count of 0's against threshold.
-			 * In case programmed page expects at least threshold
-			 * zeros in byte.
-			 * If zeros are less than threshold for programmed page/
-			 * zeros are more than threshold erased page, either
-			 * case page reported as uncorrectable.
+			 * erased-pages do not have ECC stored in OOB area,
+			 * so they need to be handled separately.
 			 */
-			if (hweight8(~read_ecc[actual_eccbytes]) >= threshold) {
+			if (!page_is_erased) {
 				/*
 				 * Update elm error vector as
 				 * data area is programmed