Message ID | 1480183585-592-16-git-send-email-yamada.masahiro@socionext.com |
---|---|
State | Deferred |
Headers | show |
On Sun, 27 Nov 2016 03:06:01 +0900 Masahiro Yamada <yamada.masahiro@socionext.com> wrote: > This function is unreadable due to the deep nesting. Note this > function does a job only when INTR_STATUS__ECC_ERR is set. > So, if the flag is not set, let it bail-out. > > Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> > --- > > drivers/mtd/nand/denali.c | 119 +++++++++++++++++++++++----------------------- > 1 file changed, 59 insertions(+), 60 deletions(-) > > diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c > index a7dc692..b577560 100644 > --- a/drivers/mtd/nand/denali.c > +++ b/drivers/mtd/nand/denali.c > @@ -908,69 +908,68 @@ static bool handle_ecc(struct denali_nand_info *denali, u8 *buf, > { > bool check_erased_page = false; > unsigned int bitflips = 0; > + u32 err_address, err_correction_info, err_byte, err_sector, err_device, > + err_correction_value; Please use single line definitions for local variables: u32 err_address, err_correction_info, err_byte, err_sector; u32 err_device, err_correction_value; Also, maybe you should use shorter names to avoid 80 chars wrapping as much as possible. > > - if (irq_status & INTR_STATUS__ECC_ERR) { > - /* read the ECC errors. we'll ignore them for now */ > - u32 err_address, err_correction_info, err_byte, > - err_sector, err_device, err_correction_value; > - denali_set_intr_modes(denali, false); > - > - do { > - err_address = ioread32(denali->flash_reg + > - ECC_ERROR_ADDRESS); > - err_sector = ECC_SECTOR(err_address); > - err_byte = ECC_BYTE(err_address); > - > - err_correction_info = ioread32(denali->flash_reg + > - ERR_CORRECTION_INFO); > - err_correction_value = > + if (!(irq_status & INTR_STATUS__ECC_ERR)) > + goto out; > + > + /* read the ECC errors. we'll ignore them for now */ > + denali_set_intr_modes(denali, false); > + > + do { > + err_address = ioread32(denali->flash_reg + ECC_ERROR_ADDRESS); > + err_sector = ECC_SECTOR(err_address); > + err_byte = ECC_BYTE(err_address); > + > + err_correction_info = ioread32(denali->flash_reg + > + ERR_CORRECTION_INFO); > + err_correction_value = > ECC_CORRECTION_VALUE(err_correction_info); > - err_device = ECC_ERR_DEVICE(err_correction_info); > - > - if (ECC_ERROR_CORRECTABLE(err_correction_info)) { > - /* > - * If err_byte is larger than ECC_SECTOR_SIZE, > - * means error happened in OOB, so we ignore > - * it. It's no need for us to correct it > - * err_device is represented the NAND error > - * bits are happened in if there are more > - * than one NAND connected. > - */ > - if (err_byte < ECC_SECTOR_SIZE) { > - struct mtd_info *mtd = > - nand_to_mtd(&denali->nand); > - int offset; > - > - offset = (err_sector * > - ECC_SECTOR_SIZE + > - err_byte) * > - denali->devnum + > - err_device; > - /* correct the ECC error */ > - buf[offset] ^= err_correction_value; > - mtd->ecc_stats.corrected++; > - bitflips++; > - } > - } else { > - /* > - * if the error is not correctable, need to > - * look at the page to see if it is an erased > - * page. if so, then it's not a real ECC error > - */ > - check_erased_page = true; > + err_device = ECC_ERR_DEVICE(err_correction_info); > + > + if (ECC_ERROR_CORRECTABLE(err_correction_info)) { > + /* > + * If err_byte is larger than ECC_SECTOR_SIZE, means error > + * happened in OOB, so we ignore it. It's no need for > + * us to correct it err_device is represented the NAND > + * error bits are happened in if there are more than > + * one NAND connected. > + */ > + if (err_byte < ECC_SECTOR_SIZE) { > + struct mtd_info *mtd = > + nand_to_mtd(&denali->nand); > + int offset; > + > + offset = (err_sector * ECC_SECTOR_SIZE + err_byte) * > + denali->devnum + err_device; > + /* correct the ECC error */ > + buf[offset] ^= err_correction_value; > + mtd->ecc_stats.corrected++; > + bitflips++; > } > - } while (!ECC_LAST_ERR(err_correction_info)); > - /* > - * Once handle all ecc errors, controller will triger > - * a ECC_TRANSACTION_DONE interrupt, so here just wait > - * for a while for this interrupt > - */ > - while (!(read_interrupt_status(denali) & > - INTR_STATUS__ECC_TRANSACTION_DONE)) > - cpu_relax(); > - clear_interrupts(denali); > - denali_set_intr_modes(denali, true); > - } > + } else { > + /* > + * if the error is not correctable, need to look at the > + * page to see if it is an erased page. if so, then > + * it's not a real ECC error > + */ > + check_erased_page = true; > + } > + } while (!ECC_LAST_ERR(err_correction_info)); > + > + /* > + * Once handle all ecc errors, controller will triger a > + * ECC_TRANSACTION_DONE interrupt, so here just wait for > + * a while for this interrupt > + */ > + while (!(read_interrupt_status(denali) & > + INTR_STATUS__ECC_TRANSACTION_DONE)) > + cpu_relax(); > + clear_interrupts(denali); > + denali_set_intr_modes(denali, true); > + > + out: > *max_bitflips = bitflips; > return check_erased_page; > }
On Sun, 27 Nov 2016 03:06:01 +0900 Masahiro Yamada <yamada.masahiro@socionext.com> wrote: > This function is unreadable due to the deep nesting. Note this > function does a job only when INTR_STATUS__ECC_ERR is set. > So, if the flag is not set, let it bail-out. > > Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> > --- > > drivers/mtd/nand/denali.c | 119 +++++++++++++++++++++++----------------------- > 1 file changed, 59 insertions(+), 60 deletions(-) > > diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c > index a7dc692..b577560 100644 > --- a/drivers/mtd/nand/denali.c > +++ b/drivers/mtd/nand/denali.c > @@ -908,69 +908,68 @@ static bool handle_ecc(struct denali_nand_info *denali, u8 *buf, > { > bool check_erased_page = false; > unsigned int bitflips = 0; > + u32 err_address, err_correction_info, err_byte, err_sector, err_device, > + err_correction_value; > > - if (irq_status & INTR_STATUS__ECC_ERR) { > - /* read the ECC errors. we'll ignore them for now */ > - u32 err_address, err_correction_info, err_byte, > - err_sector, err_device, err_correction_value; > - denali_set_intr_modes(denali, false); > - > - do { > - err_address = ioread32(denali->flash_reg + > - ECC_ERROR_ADDRESS); > - err_sector = ECC_SECTOR(err_address); > - err_byte = ECC_BYTE(err_address); > - > - err_correction_info = ioread32(denali->flash_reg + > - ERR_CORRECTION_INFO); > - err_correction_value = > + if (!(irq_status & INTR_STATUS__ECC_ERR)) > + goto out; > + > + /* read the ECC errors. we'll ignore them for now */ > + denali_set_intr_modes(denali, false); > + > + do { > + err_address = ioread32(denali->flash_reg + ECC_ERROR_ADDRESS); > + err_sector = ECC_SECTOR(err_address); > + err_byte = ECC_BYTE(err_address); > + > + err_correction_info = ioread32(denali->flash_reg + > + ERR_CORRECTION_INFO); > + err_correction_value = > ECC_CORRECTION_VALUE(err_correction_info); > - err_device = ECC_ERR_DEVICE(err_correction_info); > - > - if (ECC_ERROR_CORRECTABLE(err_correction_info)) { > - /* > - * If err_byte is larger than ECC_SECTOR_SIZE, > - * means error happened in OOB, so we ignore > - * it. It's no need for us to correct it > - * err_device is represented the NAND error > - * bits are happened in if there are more > - * than one NAND connected. > - */ > - if (err_byte < ECC_SECTOR_SIZE) { > - struct mtd_info *mtd = > - nand_to_mtd(&denali->nand); > - int offset; > - > - offset = (err_sector * > - ECC_SECTOR_SIZE + > - err_byte) * > - denali->devnum + > - err_device; > - /* correct the ECC error */ > - buf[offset] ^= err_correction_value; > - mtd->ecc_stats.corrected++; > - bitflips++; > - } > - } else { > - /* > - * if the error is not correctable, need to > - * look at the page to see if it is an erased > - * page. if so, then it's not a real ECC error > - */ > - check_erased_page = true; > + err_device = ECC_ERR_DEVICE(err_correction_info); > + > + if (ECC_ERROR_CORRECTABLE(err_correction_info)) { > + /* > + * If err_byte is larger than ECC_SECTOR_SIZE, means error > + * happened in OOB, so we ignore it. It's no need for > + * us to correct it err_device is represented the NAND > + * error bits are happened in if there are more than > + * one NAND connected. > + */ > + if (err_byte < ECC_SECTOR_SIZE) { > + struct mtd_info *mtd = > + nand_to_mtd(&denali->nand); > + int offset; > + > + offset = (err_sector * ECC_SECTOR_SIZE + err_byte) * > + denali->devnum + err_device; > + /* correct the ECC error */ > + buf[offset] ^= err_correction_value; > + mtd->ecc_stats.corrected++; > + bitflips++; Hm, bitflips is what is set in max_bitflips, and apparently the implementation (which is not yours) is not doing what the core expects. You should first count bitflips per sector with something like that: bitflips[err_sector]++; And then once you've iterated over all errors do: for (i = 0; i < nsectors; i++) max_bitflips = max(bitflips[err_sector], max_bitflips); > } > - } while (!ECC_LAST_ERR(err_correction_info)); > - /* > - * Once handle all ecc errors, controller will triger > - * a ECC_TRANSACTION_DONE interrupt, so here just wait > - * for a while for this interrupt > - */ > - while (!(read_interrupt_status(denali) & > - INTR_STATUS__ECC_TRANSACTION_DONE)) > - cpu_relax(); > - clear_interrupts(denali); > - denali_set_intr_modes(denali, true); > - } > + } else { > + /* > + * if the error is not correctable, need to look at the > + * page to see if it is an erased page. if so, then > + * it's not a real ECC error > + */ > + check_erased_page = true; > + } > + } while (!ECC_LAST_ERR(err_correction_info)); > + > + /* > + * Once handle all ecc errors, controller will triger a > + * ECC_TRANSACTION_DONE interrupt, so here just wait for > + * a while for this interrupt > + */ > + while (!(read_interrupt_status(denali) & > + INTR_STATUS__ECC_TRANSACTION_DONE)) > + cpu_relax(); > + clear_interrupts(denali); > + denali_set_intr_modes(denali, true); > + > + out: > *max_bitflips = bitflips; > return check_erased_page; > }
Hi Boris, 2016-11-28 0:42 GMT+09:00 Boris Brezillon <boris.brezillon@free-electrons.com>: >> + if (err_byte < ECC_SECTOR_SIZE) { >> + struct mtd_info *mtd = >> + nand_to_mtd(&denali->nand); >> + int offset; >> + >> + offset = (err_sector * ECC_SECTOR_SIZE + err_byte) * >> + denali->devnum + err_device; >> + /* correct the ECC error */ >> + buf[offset] ^= err_correction_value; >> + mtd->ecc_stats.corrected++; >> + bitflips++; > > Hm, bitflips is what is set in max_bitflips, and apparently the > implementation (which is not yours) is not doing what the core expects. > > You should first count bitflips per sector with something like that: > > bitflips[err_sector]++; > > > And then once you've iterated over all errors do: > > for (i = 0; i < nsectors; i++) > max_bitflips = max(bitflips[err_sector], max_bitflips); I see. For soft ECC fixup, we can calculate bitflips for each ECC sector, so I can fix the max_bitflips as the core framework expects. For hard ECC fixup, the register only reports the number of corrected bit-flips in the whole page (sum from all ECC sectors). We cannot calculate max_bitflips, I think. BTW, I noticed another problem of the current code. buf[offset] ^= err_correction_value; mtd->ecc_stats.corrected++; bitflips++; This code is counting the number of corrected bytes, not the number of corrected bits. I think multiple bit-flips within one byte can happen. Perhaps, we should add hweight8(buf[offset] ^ err_correction_value) to ecc_stats.corrected and bitflips.
On Fri, 2 Dec 2016 13:26:27 +0900 Masahiro Yamada <yamada.masahiro@socionext.com> wrote: > Hi Boris, > > > 2016-11-28 0:42 GMT+09:00 Boris Brezillon <boris.brezillon@free-electrons.com>: > >> + if (err_byte < ECC_SECTOR_SIZE) { > >> + struct mtd_info *mtd = > >> + nand_to_mtd(&denali->nand); > >> + int offset; > >> + > >> + offset = (err_sector * ECC_SECTOR_SIZE + err_byte) * > >> + denali->devnum + err_device; > >> + /* correct the ECC error */ > >> + buf[offset] ^= err_correction_value; > >> + mtd->ecc_stats.corrected++; > >> + bitflips++; > > > > Hm, bitflips is what is set in max_bitflips, and apparently the > > implementation (which is not yours) is not doing what the core expects. > > > > You should first count bitflips per sector with something like that: > > > > bitflips[err_sector]++; > > > > > > And then once you've iterated over all errors do: > > > > for (i = 0; i < nsectors; i++) > > max_bitflips = max(bitflips[err_sector], max_bitflips); > > > I see. > > For soft ECC fixup, we can calculate bitflips > for each ECC sector, so I can fix the max_bitflips > as the core framework expects. > > For hard ECC fixup, the register only reports > the number of corrected bit-flips > in the whole page (sum from all ECC sectors). > We cannot calculate max_bitflips, I think. > That's unfortunate. This means you'll return -EUCLEAN more quickly (which will trigger UBI eraseblock move), since the NAND framework is basing its 'too many bitflips' detection logic on the max_bitflips per ECC chunk and the bitflips threshold (by default 3/4 of the ECC strength). That doesn't mean it won't work, you'll just wear your NAND more quickly :-(. ITOH, doing max_bitflips = nbitflips / nsteps is not good either, because the bitflips might be all concentrated in the same ECC chunk, and in this case you really want to return -EUCLEAN. > > > BTW, I noticed another problem of the current code. > > buf[offset] ^= err_correction_value; > mtd->ecc_stats.corrected++; > bitflips++; > > This code is counting the number of corrected bytes, > not the number of corrected bits. > > > I think multiple bit-flips within one byte can happen. Yes. > > > Perhaps, we should add > > hweight8(buf[offset] ^ err_correction_value) > > to ecc_stats.corrected and bitflips. > Looks good.
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index a7dc692..b577560 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -908,69 +908,68 @@ static bool handle_ecc(struct denali_nand_info *denali, u8 *buf, { bool check_erased_page = false; unsigned int bitflips = 0; + u32 err_address, err_correction_info, err_byte, err_sector, err_device, + err_correction_value; - if (irq_status & INTR_STATUS__ECC_ERR) { - /* read the ECC errors. we'll ignore them for now */ - u32 err_address, err_correction_info, err_byte, - err_sector, err_device, err_correction_value; - denali_set_intr_modes(denali, false); - - do { - err_address = ioread32(denali->flash_reg + - ECC_ERROR_ADDRESS); - err_sector = ECC_SECTOR(err_address); - err_byte = ECC_BYTE(err_address); - - err_correction_info = ioread32(denali->flash_reg + - ERR_CORRECTION_INFO); - err_correction_value = + if (!(irq_status & INTR_STATUS__ECC_ERR)) + goto out; + + /* read the ECC errors. we'll ignore them for now */ + denali_set_intr_modes(denali, false); + + do { + err_address = ioread32(denali->flash_reg + ECC_ERROR_ADDRESS); + err_sector = ECC_SECTOR(err_address); + err_byte = ECC_BYTE(err_address); + + err_correction_info = ioread32(denali->flash_reg + + ERR_CORRECTION_INFO); + err_correction_value = ECC_CORRECTION_VALUE(err_correction_info); - err_device = ECC_ERR_DEVICE(err_correction_info); - - if (ECC_ERROR_CORRECTABLE(err_correction_info)) { - /* - * If err_byte is larger than ECC_SECTOR_SIZE, - * means error happened in OOB, so we ignore - * it. It's no need for us to correct it - * err_device is represented the NAND error - * bits are happened in if there are more - * than one NAND connected. - */ - if (err_byte < ECC_SECTOR_SIZE) { - struct mtd_info *mtd = - nand_to_mtd(&denali->nand); - int offset; - - offset = (err_sector * - ECC_SECTOR_SIZE + - err_byte) * - denali->devnum + - err_device; - /* correct the ECC error */ - buf[offset] ^= err_correction_value; - mtd->ecc_stats.corrected++; - bitflips++; - } - } else { - /* - * if the error is not correctable, need to - * look at the page to see if it is an erased - * page. if so, then it's not a real ECC error - */ - check_erased_page = true; + err_device = ECC_ERR_DEVICE(err_correction_info); + + if (ECC_ERROR_CORRECTABLE(err_correction_info)) { + /* + * If err_byte is larger than ECC_SECTOR_SIZE, means error + * happened in OOB, so we ignore it. It's no need for + * us to correct it err_device is represented the NAND + * error bits are happened in if there are more than + * one NAND connected. + */ + if (err_byte < ECC_SECTOR_SIZE) { + struct mtd_info *mtd = + nand_to_mtd(&denali->nand); + int offset; + + offset = (err_sector * ECC_SECTOR_SIZE + err_byte) * + denali->devnum + err_device; + /* correct the ECC error */ + buf[offset] ^= err_correction_value; + mtd->ecc_stats.corrected++; + bitflips++; } - } while (!ECC_LAST_ERR(err_correction_info)); - /* - * Once handle all ecc errors, controller will triger - * a ECC_TRANSACTION_DONE interrupt, so here just wait - * for a while for this interrupt - */ - while (!(read_interrupt_status(denali) & - INTR_STATUS__ECC_TRANSACTION_DONE)) - cpu_relax(); - clear_interrupts(denali); - denali_set_intr_modes(denali, true); - } + } else { + /* + * if the error is not correctable, need to look at the + * page to see if it is an erased page. if so, then + * it's not a real ECC error + */ + check_erased_page = true; + } + } while (!ECC_LAST_ERR(err_correction_info)); + + /* + * Once handle all ecc errors, controller will triger a + * ECC_TRANSACTION_DONE interrupt, so here just wait for + * a while for this interrupt + */ + while (!(read_interrupt_status(denali) & + INTR_STATUS__ECC_TRANSACTION_DONE)) + cpu_relax(); + clear_interrupts(denali); + denali_set_intr_modes(denali, true); + + out: *max_bitflips = bitflips; return check_erased_page; }
This function is unreadable due to the deep nesting. Note this function does a job only when INTR_STATUS__ECC_ERR is set. So, if the flag is not set, let it bail-out. Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> --- drivers/mtd/nand/denali.c | 119 +++++++++++++++++++++++----------------------- 1 file changed, 59 insertions(+), 60 deletions(-)