diff mbox

mtd: nand: Add support for Micron on-die ECC controller (rev2).

Message ID 1395875121-9320-1-git-send-email-davidm@egauge.net
State Rejected
Headers show

Commit Message

David Mosberger-Tang March 26, 2014, 11:05 p.m. UTC
This patch enables support for on-die ECC controllers as found on
certain Micron chips, for example.  On-die ECC can be useful for
platforms lacking hardware-support for multi-bit ECC.  The patch is
safe to apply because it never turns on on-die ECC on its own (that
job is left to the bootloader).  Instead, this code simply checks
whether the on-die ECC controller is enabled and, if so, uses it.

Signed-of-by: David Mosberger <davidm@egauge.net>
---
 drivers/mtd/nand/nand_base.c |  258 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/nand.h     |    8 ++
 2 files changed, 263 insertions(+), 3 deletions(-)

Comments

pekon gupta March 27, 2014, 6:56 a.m. UTC | #1
Hi David,

>From: linux-mtd [mailto:linux-mtd-bounces@lists.infradead.org] On Behalf Of David Mosberger
>Subject: [PATCH] mtd: nand: Add support for Micron on-die ECC controller (rev2).
>
Sorry, I looked at this patch little lately.
But good, if you can keep $subject consistent, like using
prefix "[PATCH v2]"  instead of adding suffix "(rev2)".

>This patch enables support for on-die ECC controllers as found on
>certain Micron chips, for example.  On-die ECC can be useful for
>platforms lacking hardware-support for multi-bit ECC.  The patch is
>safe to apply because it never turns on on-die ECC on its own (that
>job is left to the bootloader).  Instead, this code simply checks
>whether the on-die ECC controller is enabled and, if so, uses it.
>
Also, It would have been good if you can break this patch into two portions
because, I see this patch touching some sections of generic NAND driver
which are re-used by multiple other controller drivers, and it would be
necessary to take multiple tested-by from different owners before
accepting these changes. So, if you break your patch into
- Patch having Micron on-die controller specific changes / hooks
- Patch populating those hooks in generic NAND interface and other checks
Then it would be easy for review and fast in getting acceptance.

(Also, good to be reviewed by Brian once)


>Signed-of-by: David Mosberger <davidm@egauge.net>
>---
> drivers/mtd/nand/nand_base.c |  258 +++++++++++++++++++++++++++++++++++++++++-
> include/linux/mtd/nand.h     |    8 ++
> 2 files changed, 263 insertions(+), 3 deletions(-)
>
>diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
>index 5826da3..e642d1f 100644
>--- a/drivers/mtd/nand/nand_base.c
>+++ b/drivers/mtd/nand/nand_base.c
>@@ -78,6 +78,20 @@ static struct nand_ecclayout nand_oob_64 = {
> 		 .length = 38} }
> };
>
>+static struct nand_ecclayout nand_oob_64_on_die = {
>+	.eccbytes = 32,
>+	.eccpos = {
>+		    8,  9, 10, 11, 12, 13, 14, 15,
>+		   24, 25, 26, 27, 28, 29, 30, 31,
>+		   40, 41, 42, 43, 44, 45, 46, 47,
>+		   56, 57, 58, 59, 60, 61, 62, 63},
>+	.oobfree = {
>+		{.offset =  4,	 .length = 4},
>+		{.offset = 20,	 .length = 4},
>+		{.offset = 36,	 .length = 4},
>+		{.offset = 52,	 .length = 4}}
>+};
>+
> static struct nand_ecclayout nand_oob_128 = {
> 	.eccbytes = 48,
> 	.eccpos = {
>@@ -1250,6 +1264,196 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
> 	return max_bitflips;
> }
>
>+static int
>+set_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip, int on)
>+{
>+	u8 data[ONFI_SUBFEATURE_PARAM_LEN] = { 0, };
>+
>+	if (chip->ecc.mode != NAND_ECC_HW_ON_DIE)
>+		return 0;
>+
>+	if (on)
>+		data[0] = ONFI_FEATURE_OP_MODE_ENABLE_ON_DIE_ECC;
>+
>+	return chip->onfi_set_features(mtd, chip, ONFI_FEATURE_ADDR_OP_MODE,
>+				       data);
>+}
>+
>+/*
>+ * Return the number of bits that differ between buffers SRC1 and
>+ * SRC2, both of which are LEN bytes long.
>+ *
>+ * This code could be optimized for, but it only gets called on pages
>+ * with bitflips and compared to the cost of migrating an eraseblock,
>+ * the execution time here is trivial...
>+ */
>+static int
>+bitdiff(const void *s1, const void *s2, size_t len)
>+{
>+	const uint8_t *src1 = s1, *src2 = s2;
>+	int count = 0, i;
>+
>+	for (i = 0; i < len; ++i)
>+		count += hweight8(*src1++ ^ *src2++);
>+	return count;
>+}
>+
>+static int
>+check_for_bitflips(struct mtd_info *mtd, struct nand_chip *chip, int page)
>+{
>+	int flips = 0, max_bitflips = 0, i, j, read_size;
>+	uint8_t *chkbuf, *rawbuf, *chkoob, *rawoob;
>+	uint32_t *eccpos;
>+
>+	chkbuf = chip->buffers->chkbuf;
>+	rawbuf = chip->buffers->rawbuf;
>+	read_size = mtd->writesize + mtd->oobsize;
>+
>+	/* Read entire page w/OOB area with on-die ECC on: */
>+	chip->read_buf(mtd, chkbuf, read_size);

This should have been called directly from nand_read_page_on_die()

>+
>+	/* Re-read page with on-die ECC off: */
>+	set_on_die_ecc(mtd, chip, 0);
>+	{
Why these braces {} ? Did  you miss any error-checking here ?

>+		chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
>+		chip->read_buf(mtd, rawbuf, read_size);
>+	}

>+	set_on_die_ecc(mtd, chip, 1);
>+
>+	chkoob = chkbuf + mtd->writesize;
>+	rawoob = rawbuf + mtd->writesize;
>+	eccpos = chip->ecc.layout->eccpos;
>+	for (i = 0; i < chip->ecc.steps; ++i) {
>+		/* Count bit flips in the actual data area: */
>+		flips = bitdiff(chkbuf, rawbuf, chip->ecc.size);
>+		/* Count bit flips in the ECC bytes: */
>+		for (j = 0; j < chip->ecc.bytes; ++j) {

You should check bit-flips in complete OOB region (mtd->oobsize) not just ecc.bytes.

>+			flips += hweight8(chkoob[*eccpos] ^ rawoob[*eccpos]);
>+			++eccpos;
>+		}
>+		if (flips > 0)
>+			mtd->ecc_stats.corrected += flips;
>+		max_bitflips = max_t(int, max_bitflips, flips);
>+		chkbuf += chip->ecc.size;
>+		rawbuf += chip->ecc.size;
>+	}
>+
>+	/* Re-issue the READ command for the actual data read that follows.  */
>+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
>+

Something wrong here in the flow. (please see my comments later).

>+	return max_bitflips;
>+}
>+
>+static int check_read_status_on_die(struct mtd_info *mtd,
>+				    struct nand_chip *chip, int page)
>+{
>+	int max_bitflips = 0;
>+	uint8_t status;
>+
>+	/* Check ECC status of page just transferred into NAND's page buffer: */
>+	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
>+	status = chip->read_byte(mtd);
>+
>+	/* Switch back to data reading: */
>+	chip->cmd_ctrl(mtd, NAND_CMD_READ0,
>+		       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
>+	chip->cmd_ctrl(mtd, NAND_CMD_NONE,
>+		       NAND_NCE | NAND_CTRL_CHANGE);
>+
>+	if (status & NAND_STATUS_FAIL) {
>+		/* Page has invalid ECC. */
>+		mtd->ecc_stats.failed++;
>+	} else if (status & NAND_STATUS_REWRITE) {
>+		/*
>+		 * The Micron chips turn on the REWRITE status bit for
>+		 * ANY bit flips.  Some pages have stuck bits, so we
>+		 * don't want to migrate a block just because of
>+		 * single bit errors because otherwise, that block
>+		 * would effectively become unusable.  So, work out in
>+		 * software what the max number of flipped bits is for
>+		 * all subpages in a page:
>+		 */
>+		max_bitflips = check_for_bitflips(mtd, chip, page);
>+	}
>+	return max_bitflips;
>+}
>+
>+/**
>+ * nand_read_subpage_on_die - [REPLACEABLE] raw sub-page read function
>+ * @mtd: mtd info structure
>+ * @chip: nand chip info structure
>+ * @data_offs: offset of requested data within the page
>+ * @readlen: data length
>+ * @bufpoi: buffer to store read data
>+ */
>+static int nand_read_subpage_on_die(struct mtd_info *mtd,
>+				    struct nand_chip *chip,
>+				    uint32_t data_offs, uint32_t readlen,
>+				    uint8_t *bufpoi, int page)
>+{
>+	int start_step, end_step, num_steps, ret;
>+	int data_col_addr;
>+	int datafrag_len;
>+	uint32_t failed;
>+	uint8_t *p;
>+
>+	/* Column address within the page aligned to ECC size */
>+	start_step = data_offs / chip->ecc.size;
>+	end_step = (data_offs + readlen - 1) / chip->ecc.size;
>+	num_steps = end_step - start_step + 1;
>+
>+	/* Data size aligned to ECC ecc.size */
>+	datafrag_len = num_steps * chip->ecc.size;
>+	data_col_addr = start_step * chip->ecc.size;
>+	p = bufpoi + data_col_addr;
>+
>+	failed = mtd->ecc_stats.failed;
>+
>+	ret = check_read_status_on_die(mtd, chip, page);
>+	if (ret < 0 || mtd->ecc_stats.failed != failed) {
>+		memset(p, 0, datafrag_len);
>+		return ret;
>+	}
>+
>+	/* If we read not a page aligned data */
>+	if (data_col_addr != 0)
>+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, data_col_addr, -1);
>+
>+	chip->read_buf(mtd, p, datafrag_len);
>+
>+	return ret;
>+}
>+
>+/**
>+ * nand_read_page_on_die - [INTERN] read raw page data without ecc
>+ * @mtd: mtd info structure
>+ * @chip: nand chip info structure
>+ * @buf: buffer to store read data
>+ * @oob_required: caller requires OOB data read to chip->oob_poi
>+ * @page: page number to read
>+ */
>+static int nand_read_page_on_die(struct mtd_info *mtd, struct nand_chip *chip,
>+				 uint8_t *buf, int oob_required, int page)
>+{
>+	uint32_t failed;
>+	int ret;
>+
>+	failed = mtd->ecc_stats.failed;
>+

This is the entry point of chip->ecc.read_page(),  So, your first call to ecc.read_buf
should have happened here, not in check_for_bitflips(). And you should have just
passed the read_buffer to for checking of bit-flips.

>+	ret = check_read_status_on_die(mtd, chip, page);
Also this should have been part of nand_chip->ecc.correct() interface.



>+	if (ret < 0 || mtd->ecc_stats.failed != failed) {
>+		memset(buf, 0, mtd->writesize);

This is not required, because if there are ECC failure (uncorrectable bit-flips)
then, nand_do_read_ops() will automatically convert it into -EBADMSG.
And then let above layers MTD, UBI determine what to do with the corrupted data.
Un-correctable bit-flips may be in those portions of  a page which do not
contain any data. Hence, you should not reset the buffer even for ECC failures.

Example: UBIFS erase-header and volume-id-headers occupy on first few
bytes of the page. And so UBIFS uses in-band CRC checks to see if its headers
have any corruption. Now, even if the NAND page had some uncorrectable
bit-flips, but those bit-flips do not affect the data of UBIFS headers,
then UBIFS just reads those pages and scrubs the block.


>+		if (oob_required)
>+			memset(chip->oob_poi, 0, mtd->oobsize);
 -- same here --

>+		return ret;
>+	}
>+
>+	chip->read_buf(mtd, buf, mtd->writesize);

This is not correct. You first read the data, and then check for bit-flips.
You *don't* re-read the page again, because on re-read you may find
new bit-flips due to read-disturb errors, which were not accounted in
the earlier count. 

So, if you follow the below sequence..
(1) read buffer (with on-die ECC == ON)
(2) check for bit-flips (just pass the read_buffer)
(3) correct bit-flips (again just pass read_buffer)
Then probably you don't need this call, and you can re-use existing generic
NAND driver implementations.


>+	if (oob_required)
>+		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
>+	return ret;
>+}
>+
> /**
>  * nand_read_page_hwecc - [REPLACEABLE] hardware ECC based page read function
>  * @mtd: mtd info structure
>@@ -3783,22 +3987,46 @@ EXPORT_SYMBOL(nand_scan_ident);
> int nand_scan_tail(struct mtd_info *mtd)
> {
> 	int i;
>+	u8 features[ONFI_SUBFEATURE_PARAM_LEN];
> 	struct nand_chip *chip = mtd->priv;
> 	struct nand_ecc_ctrl *ecc = &chip->ecc;
> 	struct nand_buffers *nbuf;
>
>+	if (chip->onfi_get_features(mtd, chip, ONFI_FEATURE_ADDR_OP_MODE,
>+				    features) >= 0) {
>+		if (features[0] & ONFI_FEATURE_OP_MODE_ENABLE_ON_DIE_ECC) {
>+			/*
>+			 * If the chip has on-die ECC enabled, we kind
>+			 * of have to do the same...
>+			 */
>+			chip->ecc.mode = NAND_ECC_HW_ON_DIE;
>+			pr_info("Using on-die ECC\n");
>+		}
>+	}
>+
> 	/* New bad blocks should be marked in OOB, flash-based BBT, or both */
> 	BUG_ON((chip->bbt_options & NAND_BBT_NO_OOB_BBM) &&
> 			!(chip->bbt_options & NAND_BBT_USE_FLASH));
>
> 	if (!(chip->options & NAND_OWN_BUFFERS)) {
>+		size_t on_die_bufsz = 0;
>+
>+		if (chip->ecc.mode == NAND_ECC_HW_ON_DIE)
>+			on_die_bufsz = 2*(mtd->writesize + mtd->oobsize);
>+
> 		nbuf = kzalloc(sizeof(*nbuf) + mtd->writesize
>-				+ mtd->oobsize * 3, GFP_KERNEL);
>+				+ mtd->oobsize * 3 + on_die_bufsz, GFP_KERNEL);
> 		if (!nbuf)
> 			return -ENOMEM;
> 		nbuf->ecccalc = (uint8_t *)(nbuf + 1);
> 		nbuf->ecccode = nbuf->ecccalc + mtd->oobsize;
> 		nbuf->databuf = nbuf->ecccode + mtd->oobsize;
>+		if (chip->ecc.mode == NAND_ECC_HW_ON_DIE) {
>+			nbuf->chkbuf = (nbuf->databuf + mtd->writesize
>+					+ mtd->oobsize);
>+			nbuf->rawbuf = (nbuf->chkbuf + mtd->writesize
>+					+ mtd->oobsize);
>+		}
>
> 		chip->buffers = nbuf;
> 	} else {
>@@ -3956,6 +4184,25 @@ int nand_scan_tail(struct mtd_info *mtd)
> 		ecc->strength = ecc->bytes * 8 / fls(8 * ecc->size);
> 		break;
>
>+	case NAND_ECC_HW_ON_DIE:
>+		/* nand_bbt attempts to put Bbt marker at offset 8 in
>+		   oob, which is used for ECC by Micron
>+		   MT29F4G16ABADAWP, for example.  Fixed by not using
>+		   OOB for BBT marker.  */
>+		chip->bbt_options |= NAND_BBT_NO_OOB;
>+		chip->ecc.layout = &nand_oob_64_on_die;
>+		chip->ecc.read_page = nand_read_page_on_die;
>+		chip->ecc.read_subpage = nand_read_subpage_on_die;
>+		chip->ecc.write_page = nand_write_page_raw;
>+		chip->ecc.read_oob = nand_read_oob_std;
>+		chip->ecc.read_page_raw = nand_read_page_raw;
>+		chip->ecc.write_page_raw = nand_write_page_raw;
>+		chip->ecc.write_oob = nand_write_oob_std;
>+		chip->ecc.size = 512;
>+		chip->ecc.bytes = 8;
>+		chip->ecc.strength = 4;
>+		break;
>+
> 	case NAND_ECC_NONE:
> 		pr_warn("NAND_ECC_NONE selected by board driver. "
> 			   "This is not recommended!\n");
>@@ -4023,8 +4270,13 @@ int nand_scan_tail(struct mtd_info *mtd)
> 	/* Invalidate the pagebuffer reference */
> 	chip->pagebuf = -1;
>
>-	/* Large page NAND with SOFT_ECC should support subpage reads */
>-	if ((ecc->mode == NAND_ECC_SOFT) && (chip->page_shift > 9))
>+	/*
>+	 * Large page NAND with SOFT_ECC or on-die ECC should support
>+	 * subpage reads.
>+	 */
>+	if (((ecc->mode == NAND_ECC_SOFT)
>+	     || (chip->ecc.mode == NAND_ECC_HW_ON_DIE))
>+	    && (chip->page_shift > 9))
> 		chip->options |= NAND_SUBPAGE_READ;
>
> 	/* Fill in remaining MTD driver data */
>diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
>index 450d61e..4514ced 100644
>--- a/include/linux/mtd/nand.h
>+++ b/include/linux/mtd/nand.h
>@@ -101,6 +101,7 @@ extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
> /* Status bits */
> #define NAND_STATUS_FAIL	0x01
> #define NAND_STATUS_FAIL_N1	0x02
>+#define NAND_STATUS_REWRITE	0x08
> #define NAND_STATUS_TRUE_READY	0x20
> #define NAND_STATUS_READY	0x40
> #define NAND_STATUS_WP		0x80
>@@ -115,6 +116,7 @@ typedef enum {
> 	NAND_ECC_HW_SYNDROME,
> 	NAND_ECC_HW_OOB_FIRST,
> 	NAND_ECC_SOFT_BCH,
>+	NAND_ECC_HW_ON_DIE,
> } nand_ecc_modes_t;
>
> /*
>@@ -214,6 +216,10 @@ struct nand_chip;
> /* Vendor-specific feature address (Micron) */
> #define ONFI_FEATURE_ADDR_READ_RETRY	0x89
>
>+/* Vendor-specific array operation mode (Micron) */
>+#define ONFI_FEATURE_ADDR_OP_MODE	0x90
>+#define ONFI_FEATURE_OP_MODE_ENABLE_ON_DIE_ECC		0x08
>+
> /* ONFI subfeature parameters length */
> #define ONFI_SUBFEATURE_PARAM_LEN	4
>
>@@ -516,6 +522,8 @@ struct nand_buffers {
> 	uint8_t	*ecccalc;
> 	uint8_t	*ecccode;
> 	uint8_t *databuf;
>+	uint8_t *chkbuf;
>+	uint8_t *rawbuf;
> };
>
> /**
>--
>1.7.9.5
>
>
Also, can include a link to public copy of the Micron device datasheet
which has on-die ECC feature. It would be helpful to understand your patch


with regards, pekon
Gerhard Sittig March 27, 2014, 11:27 a.m. UTC | #2
thanks for adding me to Cc:

On Thu, 2014-03-27 at 06:56 +0000, Gupta, Pekon wrote:
> 
> >From: linux-mtd [mailto:linux-mtd-bounces@lists.infradead.org] On Behalf Of David Mosberger
> >Subject: [PATCH] mtd: nand: Add support for Micron on-die ECC controller (rev2).
> >
> Sorry, I looked at this patch little lately.
> But good, if you can keep $subject consistent, like using
> prefix "[PATCH v2]"  instead of adding suffix "(rev2)".

David, "PATCH v2" (can be applied with --subject-prefix in the
--format-patch invocation) has several benefits, reviewers
clearly notice it's another iteration, and the irrelevant
"(rev2)" won't clobber the commit message upon application

> >+static int
> >+check_for_bitflips(struct mtd_info *mtd, struct nand_chip *chip, int page)
> >+{
> >+	int flips = 0, max_bitflips = 0, i, j, read_size;
> >+	uint8_t *chkbuf, *rawbuf, *chkoob, *rawoob;
> >+	uint32_t *eccpos;
> >+
> >+	chkbuf = chip->buffers->chkbuf;
> >+	rawbuf = chip->buffers->rawbuf;
> >+	read_size = mtd->writesize + mtd->oobsize;
> >+
> >+	/* Read entire page w/OOB area with on-die ECC on: */
> >+	chip->read_buf(mtd, chkbuf, read_size);
> 
> This should have been called directly from nand_read_page_on_die()

given that there are questions and concerns raised, a comment in
the source about the program flow (in the "good" and "corrected"
and "beyond repair" cases) might be good

I'll respond to the "don't do this here" below

> >+static int check_read_status_on_die(struct mtd_info *mtd,
> >+				    struct nand_chip *chip, int page)
> >+{
> >+	int max_bitflips = 0;
> >+	uint8_t status;
> >+
> >+	/* Check ECC status of page just transferred into NAND's page buffer: */
> >+	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
> >+	status = chip->read_byte(mtd);
> >+
> >+	/* Switch back to data reading: */
> >+	chip->cmd_ctrl(mtd, NAND_CMD_READ0,
> >+		       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
> >+	chip->cmd_ctrl(mtd, NAND_CMD_NONE,
> >+		       NAND_NCE | NAND_CTRL_CHANGE);
> >+
> >+	if (status & NAND_STATUS_FAIL) {
> >+		/* Page has invalid ECC. */
> >+		mtd->ecc_stats.failed++;
> >+	} else if (status & NAND_STATUS_REWRITE) {
> >+		/*
> >+		 * The Micron chips turn on the REWRITE status bit for
> >+		 * ANY bit flips.  Some pages have stuck bits, so we
> >+		 * don't want to migrate a block just because of
> >+		 * single bit errors because otherwise, that block
> >+		 * would effectively become unusable.  So, work out in
> >+		 * software what the max number of flipped bits is for
> >+		 * all subpages in a page:
> >+		 */
> >+		max_bitflips = check_for_bitflips(mtd, chip, page);
> >+	}
> >+	return max_bitflips;
> >+}

a quick search even suggests that the REWRITE status bit need not
be seen upon _any_ fixable ECC error, but might depend on an
internal threshold (i.e. when this chip or a later model thinks
that a data refresh might be due); this is an assumption, cannot
tell whether it was verified

http://lists.infradead.org/pipermail/linux-mtd/2012-November/044855.html

it's a pity that the hardware won't tell how many bit errors were
detected, and that you feel you have to do the dance of switching
modes and reading raw data just to find this detail for yourself
:(  this is a new quality in all the patches I have seen floating
around


> >+/**
> >+ * nand_read_page_on_die - [INTERN] read raw page data without ecc
> >+ * @mtd: mtd info structure
> >+ * @chip: nand chip info structure
> >+ * @buf: buffer to store read data
> >+ * @oob_required: caller requires OOB data read to chip->oob_poi
> >+ * @page: page number to read
> >+ */
> >+static int nand_read_page_on_die(struct mtd_info *mtd, struct nand_chip *chip,
> >+				 uint8_t *buf, int oob_required, int page)
> >+{
> >+	uint32_t failed;
> >+	int ret;
> >+
> >+	failed = mtd->ecc_stats.failed;
> >+
> 
> This is the entry point of chip->ecc.read_page(),  So, your first call to ecc.read_buf
> should have happened here, not in check_for_bitflips(). And you should have just
> passed the read_buffer to for checking of bit-flips.
> 
> >+	ret = check_read_status_on_die(mtd, chip, page);
> Also this should have been part of nand_chip->ecc.correct() interface.

AFAIU the sequence of commands which the NAND chip requires
doesn't quite map to the sequence of what the MTD layer does
(emit the read command, read the data, optionally check ECC)

when "on die ECC" is enabled, the chip requires a sequence of
- read page command (READ0, addresses, READSTART)
- status check (mandatory! cmd 70, to check FAIL and REWRITE bits)
- re-enter data read mode (READ0, _without_ addresses and START)
- fetch data bytes

actually the READ0 plus READSTART does a transfer from the chip's
array into the chip's caches, before data is fetched out of the
chip into the SoC; the repeated READ0 does some kind of "rewind"
from status output to data output, it doesn't re-read the array
within the chip

> 
> >+	if (ret < 0 || mtd->ecc_stats.failed != failed) {
> >+		memset(buf, 0, mtd->writesize);
> 
> This is not required, because if there are ECC failure (uncorrectable bit-flips)
> then, nand_do_read_ops() will automatically convert it into -EBADMSG.
> And then let above layers MTD, UBI determine what to do with the corrupted data.
> Un-correctable bit-flips may be in those portions of  a page which do not
> contain any data. Hence, you should not reset the buffer even for ECC failures.
> 
> Example: UBIFS erase-header and volume-id-headers occupy on first few
> bytes of the page. And so UBIFS uses in-band CRC checks to see if its headers
> have any corruption. Now, even if the NAND page had some uncorrectable
> bit-flips, but those bit-flips do not affect the data of UBIFS headers,
> then UBIFS just reads those pages and scrubs the block.

that's a good hint, one might want to check the other ECC
callback routines (I thought I saw that pattern elsewhere, but
was working on a different kernel version)

> >+		if (oob_required)
> >+			memset(chip->oob_poi, 0, mtd->oobsize);
>  -- same here --
> 
> >+		return ret;
> >+	}
> >+
> >+	chip->read_buf(mtd, buf, mtd->writesize);
> 
> This is not correct. You first read the data, and then check for bit-flips.
> You *don't* re-read the page again, because on re-read you may find
> new bit-flips due to read-disturb errors, which were not accounted in
> the earlier count. 
> 
> So, if you follow the below sequence..
> (1) read buffer (with on-die ECC == ON)
> (2) check for bit-flips (just pass the read_buffer)
> (3) correct bit-flips (again just pass read_buffer)
> Then probably you don't need this call, and you can re-use existing generic
> NAND driver implementations.

unfortunately this is not possible according to the chip's
datasheet; even if you _evaluate_ the status only later, you have
to _fetch_ it before the data, and thus do the command re-issue
dance, and cannot use nand_command_lp() since it automatically
does READSTART for READ0

adding another pseudo command with high bits set and LSB 0x00
might help (similar to deplete), then the common command routine
can get re-used

> >@@ -3956,6 +4184,25 @@ int nand_scan_tail(struct mtd_info *mtd)
> > 		ecc->strength = ecc->bytes * 8 / fls(8 * ecc->size);
> > 		break;
> >
> >+	case NAND_ECC_HW_ON_DIE:
> >+		/* nand_bbt attempts to put Bbt marker at offset 8 in
> >+		   oob, which is used for ECC by Micron
> >+		   MT29F4G16ABADAWP, for example.  Fixed by not using
> >+		   OOB for BBT marker.  */
> >+		chip->bbt_options |= NAND_BBT_NO_OOB;
> >+		chip->ecc.layout = &nand_oob_64_on_die;
> >+		chip->ecc.read_page = nand_read_page_on_die;
> >+		chip->ecc.read_subpage = nand_read_subpage_on_die;
> >+		chip->ecc.write_page = nand_write_page_raw;
> >+		chip->ecc.read_oob = nand_read_oob_std;
> >+		chip->ecc.read_page_raw = nand_read_page_raw;
> >+		chip->ecc.write_page_raw = nand_write_page_raw;
> >+		chip->ecc.write_oob = nand_write_oob_std;
> >+		chip->ecc.size = 512;
> >+		chip->ecc.bytes = 8;
> >+		chip->ecc.strength = 4;
> >+		break;
> >+

I was wondering, have you seen an explicit discussion of how many
data bytes to read and to write in read and program requests?  I
missed this in the datasheet, neither found it in TN-29-56, but
just might have been blind ...


> >@@ -4023,8 +4270,13 @@ int nand_scan_tail(struct mtd_info *mtd)
> > 	/* Invalidate the pagebuffer reference */
> > 	chip->pagebuf = -1;
> >
> >-	/* Large page NAND with SOFT_ECC should support subpage reads */
> >-	if ((ecc->mode == NAND_ECC_SOFT) && (chip->page_shift > 9))
> >+	/*
> >+	 * Large page NAND with SOFT_ECC or on-die ECC should support
> >+	 * subpage reads.
> >+	 */
> >+	if (((ecc->mode == NAND_ECC_SOFT)
> >+	     || (chip->ecc.mode == NAND_ECC_HW_ON_DIE))
> >+	    && (chip->page_shift > 9))
> > 		chip->options |= NAND_SUBPAGE_READ;
> >
> > 	/* Fill in remaining MTD driver data */

this might apply to this specific model, but is it a general rule
that chips with on-die-ECC will have subpage support?  and that
we always want to use it?

> Also, can include a link to public copy of the Micron device datasheet
> which has on-die ECC feature. It would be helpful to understand your patch

I don't have a link at hand, but the datasheet's footer reads
"m60a_4gb_8gb_16gb_ecc_nand.pdf"

searching for implementations, there's the "sketch" or outline in
TN-29-56, but I haven't seen an official implementation from
Micron yet, and all the floating patches appear to "be inspired
by each other" if they are not identical (they all follow a
certain scheme, and even use identical identifiers) -- would be
nice to see some official reference, regardless of how much test
an "arbitrary" external implementation has received


virtually yours
Gerhard Sittig
David Mosberger-Tang March 27, 2014, 7:28 p.m. UTC | #3
On Thu, Mar 27, 2014 at 5:27 AM, Gerhard Sittig <gsi@denx.de> wrote:
> thanks for adding me to Cc:
>
> On Thu, 2014-03-27 at 06:56 +0000, Gupta, Pekon wrote:
>>
>> >From: linux-mtd [mailto:linux-mtd-bounces@lists.infradead.org] On Behalf Of David Mosberger
>> >Subject: [PATCH] mtd: nand: Add support for Micron on-die ECC controller (rev2).
>> >
>> Sorry, I looked at this patch little lately.
>> But good, if you can keep $subject consistent, like using
>> prefix "[PATCH v2]"  instead of adding suffix "(rev2)".
>
> David, "PATCH v2" (can be applied with --subject-prefix in the
> --format-patch invocation) has several benefits, reviewers
> clearly notice it's another iteration, and the irrelevant
> "(rev2)" won't clobber the commit message upon application

OK, thanks for the tip!

> a quick search even suggests that the REWRITE status bit need not
> be seen upon _any_ fixable ECC error, but might depend on an
> internal threshold (i.e. when this chip or a later model thinks
> that a data refresh might be due); this is an assumption, cannot
> tell whether it was verified

That's correct.  Micron doesn't define that clearly, but we found that for the
chip we used (MT29F4G16ABADAWP) the bit gets set for any ECC errors
(and that behavior was confirmed by others on the linux-mtd list previously).

> http://lists.infradead.org/pipermail/linux-mtd/2012-November/044855.html
>
> it's a pity that the hardware won't tell how many bit errors were
> detected, and that you feel you have to do the dance of switching
> modes and reading raw data just to find this detail for yourself
> :(  this is a new quality in all the patches I have seen floating
> around

I agree.  I'm certainly not looking for extra work or creating needless
complexity in the driver and at first was hoping that we could get away
without having to count bitflips, but in the end, it was the only solution
that worked correctly.  Without it, you'd end up rewriting blocks
needlessly, making the problem only worse.

> AFAIU the sequence of commands which the NAND chip requires
> doesn't quite map to the sequence of what the MTD layer does
> (emit the read command, read the data, optionally check ECC)
>
> when "on die ECC" is enabled, the chip requires a sequence of
> - read page command (READ0, addresses, READSTART)
> - status check (mandatory! cmd 70, to check FAIL and REWRITE bits)
> - re-enter data read mode (READ0, _without_ addresses and START)
> - fetch data bytes
>
> actually the READ0 plus READSTART does a transfer from the chip's
> array into the chip's caches, before data is fetched out of the
> chip into the SoC; the repeated READ0 does some kind of "rewind"
> from status output to data output, it doesn't re-read the array
> within the chip

That's correct.

>> >+    if (ret < 0 || mtd->ecc_stats.failed != failed) {
>> >+            memset(buf, 0, mtd->writesize);
>>
>> This is not required, because if there are ECC failure (uncorrectable bit-flips)
>> then, nand_do_read_ops() will automatically convert it into -EBADMSG.
>> And then let above layers MTD, UBI determine what to do with the corrupted data.
>> Un-correctable bit-flips may be in those portions of  a page which do not
>> contain any data. Hence, you should not reset the buffer even for ECC failures.
>>
>> Example: UBIFS erase-header and volume-id-headers occupy on first few
>> bytes of the page. And so UBIFS uses in-band CRC checks to see if its headers
>> have any corruption. Now, even if the NAND page had some uncorrectable
>> bit-flips, but those bit-flips do not affect the data of UBIFS headers,
>> then UBIFS just reads those pages and scrubs the block.

I see.  OK, I'll fix that, thanks!

> I was wondering, have you seen an explicit discussion of how many
> data bytes to read and to write in read and program requests?  I
> missed this in the datasheet, neither found it in TN-29-56, but
> just might have been blind ...

Sorry, I'm not sure I understand the question.  Let me know if this doesn't
answer your question: the way those chips work is that when you issue a
READ command, the entire page gets transferred and ECC applied.
After that, you can transfer bytes from the read buffer at will (even
randomly).

>> >+     * Large page NAND with SOFT_ECC or on-die ECC should support
>> >+     * subpage reads.
>> >+     */
>> >+    if (((ecc->mode == NAND_ECC_SOFT)
>> >+         || (chip->ecc.mode == NAND_ECC_HW_ON_DIE))
>> >+        && (chip->page_shift > 9))
>> >             chip->options |= NAND_SUBPAGE_READ;
>> >
>> >     /* Fill in remaining MTD driver data */
>
> this might apply to this specific model, but is it a general rule
> that chips with on-die-ECC will have subpage support?  and that
> we always want to use it?

I don't know, but obviously that can be tweaked later if it's not perfect.

>> Also, can include a link to public copy of the Micron device datasheet
>> which has on-die ECC feature. It would be helpful to understand your patch
>
> I don't have a link at hand, but the datasheet's footer reads
> "m60a_4gb_8gb_16gb_ecc_nand.pdf"
>
> searching for implementations, there's the "sketch" or outline in
> TN-29-56, but I haven't seen an official implementation from
> Micron yet, and all the floating patches appear to "be inspired
> by each other" if they are not identical (they all follow a
> certain scheme, and even use identical identifiers) -- would be
> nice to see some official reference, regardless of how much test
> an "arbitrary" external implementation has received

As far as I know, those datasheets all require an NDA.
For example, google for MT29F4G16ABADAWP and then try
to download the datasheet: you'll have to create an account and
agree to an NDA.  (This is part of the reason we don't use Micron
for our manufacturing anymore).

  --david
pekon gupta March 28, 2014, 8:37 a.m. UTC | #4
Hi,

>From: Gerhard Sittig [mailto:gsi@denx.de]
>>On Thu, 2014-03-27 at 06:56 +0000, Gupta, Pekon wrote:
>> >From: David Mosberger

[...]

>> >+/**
>> >+ * nand_read_page_on_die - [INTERN] read raw page data without ecc
>> >+ * @mtd: mtd info structure
>> >+ * @chip: nand chip info structure
>> >+ * @buf: buffer to store read data
>> >+ * @oob_required: caller requires OOB data read to chip->oob_poi
>> >+ * @page: page number to read
>> >+ */
>> >+static int nand_read_page_on_die(struct mtd_info *mtd, struct nand_chip *chip,
>> >+				 uint8_t *buf, int oob_required, int page)
>> >+{
>> >+	uint32_t failed;
>> >+	int ret;
>> >+
>> >+	failed = mtd->ecc_stats.failed;
>> >+
>>
>> This is the entry point of chip->ecc.read_page(),  So, your first call to ecc.read_buf
>> should have happened here, not in check_for_bitflips(). And you should have just
>> passed the read_buffer to for checking of bit-flips.
>>
>> >+	ret = check_read_status_on_die(mtd, chip, page);
>> Also this should have been part of nand_chip->ecc.correct() interface.
>
>AFAIU the sequence of commands which the NAND chip requires
>doesn't quite map to the sequence of what the MTD layer does
>(emit the read command, read the data, optionally check ECC)
>
>when "on die ECC" is enabled, the chip requires a sequence of
>- read page command (READ0, addresses, READSTART)
>- status check (mandatory! cmd 70, to check FAIL and REWRITE bits)
>- re-enter data read mode (READ0, _without_ addresses and START)
>- fetch data bytes
>
I checked Micron Datasheet for "MT29F4G16ABADAWP by name
"m60a_4gb_8gb_16gb_ecc_nand.pdf" and on its 
"Figure 39: READ PAGE (00h-30h) Operation with Internal ECC Enabled"
There is no mention of above third step.

As per the "figure 39" a READ_PAGE Operation with internal ECC enabled
requires following sequence..
- [00h] <address> <address> ... <address> [30h]  /* READ_PAGE command */
- [70h] <status> [00h]        /* READ_STATUS command */
And after that data comes our serial on the bus. So in that case you can


>actually the READ0 plus READSTART does a transfer from the chip's
>array into the chip's caches, before data is fetched out of the
>chip into the SoC; the repeated READ0 does some kind of "rewind"
>from status output to data output, it doesn't re-read the array
>within the chip
>
Agree.. All I'm saying is if you merge "check_read_status_on_die()" into
nand_read_page_on_die() then this would map to normal NAND driver flow.

nand_read_page_on_die( ... ) {
 (a)	chip->cmdfunc(mtd, NAND_CMD_READ0, column, page);  /* READ_PAGE command */

 (b)	/* Check ECC status of page just transferred into NAND's page buffer: */
	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
	status = chip->read_byte(mtd);

(c)	/* Read the data out from the NAND device */
	chip->read_buf(mtd, buf, mtd->writesize);
	if (oob_required)
		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);

(d)	/* check for ECC errors */
	if (status & NAND_STATUS_FAIL) {
		/* Page has invalid ECC, but still pass to above layers */
		return -EBADMSG;
	} else if (status & NAND_STATUS_REWRITE) {
		bitflip_count = chip->ecc.correct( mtd, buf, , ... )
		return bitflip_count;
	} else {
		return 0;
	}
}

Where; chip->ecc.correct = check_for_bitflips();

Above implementation, as you already fetch the data from NAND device
at the start, it saves you from one extra read done in check_for_bitflips()
+	/* Read entire page w/OOB area with on-die ECC on: */
+	chip->read_buf(mtd, chkbuf, read_size);

Also, you have to return the read_data, even in case of ECC failures,
which was not earlier done in check_read_status_on_die()

[...]

>> (1) read buffer (with on-die ECC == ON)
>> (2) check for bit-flips (just pass the read_buffer)
>> (3) correct bit-flips (again just pass read_buffer)
>> Then probably you don't need this call, and you can re-use existing generic
>> NAND driver implementations.
>
>unfortunately this is not possible according to the chip's
>datasheet; even if you _evaluate_ the status only later, you have
>to _fetch_ it before the data, and thus do the command re-issue
>dance, and cannot use nand_command_lp() since it automatically
>does READSTART for READ0
>
I have broken (1) into (a) + (b) + (c) above.
And (2) + (3) = (d), where, (3) is actually chip->ecc.correct() = check_for_bitflips().

Hope that clarifies the intend..


With regards, pekon
Gerhard Sittig March 28, 2014, 12:43 p.m. UTC | #5
On Fri, 2014-03-28 at 08:37 +0000, Gupta, Pekon wrote:
> 
> >From: Gerhard Sittig [mailto:gsi@denx.de]
> 
> [...]
> 
> >AFAIU the sequence of commands which the NAND chip requires
> >doesn't quite map to the sequence of what the MTD layer does
> >(emit the read command, read the data, optionally check ECC)
> >
> >when "on die ECC" is enabled, the chip requires a sequence of
> >- read page command (READ0, addresses, READSTART)
> >- status check (mandatory! cmd 70, to check FAIL and REWRITE bits)
> >- re-enter data read mode (READ0, _without_ addresses and START)
> >- fetch data bytes
> >
> I checked Micron Datasheet for "MT29F4G16ABADAWP by name
> "m60a_4gb_8gb_16gb_ecc_nand.pdf" and on its 
> "Figure 39: READ PAGE (00h-30h) Operation with Internal ECC Enabled"
> There is no mention of above third step.
> 
> As per the "figure 39" a READ_PAGE Operation with internal ECC enabled
> requires following sequence..
> - [00h] <address> <address> ... <address> [30h]  /* READ_PAGE command */
> - [70h] <status> [00h]        /* READ_STATUS command */
> And after that data comes our serial on the bus. So in that case you can

They cheat. :)  You'll only notice when you look closely, I
misread this stuff too at first.  The Micron datasheet uses
different terms than the MTD implementation in Linux does.

Their "READ_PAGE" command, referred to as 00-30, actually is in
Linux the "READ0 command (0x00) and READSTART command (0x30)"
sequence.  (I'd like to see them as sequences or aggregates of
existing commands, instead of even more commands that happen to
re-use other command's opcodes, which I feel is confusing.)

The above "70 status 00" actually is "STATUS (command 70 and a
status byte) plus READ0 (command 00)".  Not re-emitting the READ0
(this time without the addresses and READSTART) would make you
fetch more status bytes instead of the cached data bytes.

So you will find everything that is done in the implementation,
it's just that you have to "translate" between the perspectives.
That's OK, Linux is not the reference, but one has to remain
aware of this difference.


It's nice to see that apparently the "regular read with
subsequent ECC check" can be done, too.  With an option for
another "raw read and bit flip counting" should errors have
occured.


virtually yours
Gerhard Sittig
Gerhard Sittig March 28, 2014, 12:56 p.m. UTC | #6
On Thu, 2014-03-27 at 13:28 -0600, David Mosberger wrote:
> 
> On Thu, Mar 27, 2014 at 5:27 AM, Gerhard Sittig <gsi@denx.de> wrote:
> 
> > I was wondering, have you seen an explicit discussion of how many
> > data bytes to read and to write in read and program requests?  I
> > missed this in the datasheet, neither found it in TN-29-56, but
> > just might have been blind ...
> 
> Sorry, I'm not sure I understand the question.  Let me know if this doesn't
> answer your question: the way those chips work is that when you issue a
> READ command, the entire page gets transferred and ECC applied.
> After that, you can transfer bytes from the read buffer at will (even
> randomly).

It's the answer to one half of what I wondered. :)

Upon read, the chip does ECC detection and correction in
transparent ways.  So you probably just fetch the status and the
page's content (the writesize count), omitting the OOB area.
What would you get when you keep reading after the page size?
Probably the OOB area.  Is it required to read this as well, to
have it around in the write case?

Upon write, you certainly have to send the page content, i.e. a
stream of bytes with a count of the page's payload size.  But I
would not be certain whether you have to send the OOB area as
well.  Even if the chip will create and store ECC data by itself,
I could not find whether you have to "clock those bytes in" to
trigger the in-chip operation, too.  Am I overly paranoid?  Am I
talking nonsense? :)  There must have been a reason for the
introduction of the "oob_required" condition -- I just understood
it was other MLC chips, not the on-die-ECC that we are talking
about here.

But _if_ you have to send the OOB data upon write, most probably
you have to read this area as well, to pass back those data bytes
within OOB that are not ECC, or else you risk losing mgmt data.

Yes, I'm aware of the READ_OOB "pseudo command", which does a
read operation yet implies a specific offset within the cache,
i.e. it "forwards the read pointer".  This is used in a different
context, as is the raw operation.


virtually yours
Gerhard Sittig
David Mosberger-Tang March 28, 2014, 3:40 p.m. UTC | #7
Gerhard,

On Fri, Mar 28, 2014 at 6:56 AM, Gerhard Sittig <gsi@denx.de> wrote:

> Upon read, the chip does ECC detection and correction in
> transparent ways.  So you probably just fetch the status and the
> page's content (the writesize count), omitting the OOB area.
> What would you get when you keep reading after the page size?
> Probably the OOB area.  Is it required to read this as well, to
> have it around in the write case?

Yeah, I see where the confusion comes from: ECC is not calculated
when transferring data between the chip and the host.  Instead, ECC is
calculated
when moving a page between the flash array and the chip-internal buffer.

You can transfer data between the host and the chip-internal buffer
in any way you want as it doesn't involve the ECC engine at all.

Instead, it's the READ/PAGEPROG commands that take care of checking/updating
the ECC bytes.

In the datasheet, you can see that from the fact that the chip's RDY
line will stay low
longer with internal ECC enabled: for READ, its t_R_ECC (45-70us) instead of
t_R (25us), for PROGRAM PAGE it's t_PROG_ECC (220-600us) instead of
t_PROG (200-600us).

  --david
David Mosberger-Tang March 28, 2014, 3:52 p.m. UTC | #8
On Thu, Mar 27, 2014 at 12:56 AM, Gupta, Pekon <pekon@ti.com> wrote:

>>+      set_on_die_ecc(mtd, chip, 1);
>>+
>>+      chkoob = chkbuf + mtd->writesize;
>>+      rawoob = rawbuf + mtd->writesize;
>>+      eccpos = chip->ecc.layout->eccpos;
>>+      for (i = 0; i < chip->ecc.steps; ++i) {
>>+              /* Count bit flips in the actual data area: */
>>+              flips = bitdiff(chkbuf, rawbuf, chip->ecc.size);
>>+              /* Count bit flips in the ECC bytes: */
>>+              for (j = 0; j < chip->ecc.bytes; ++j) {
>
> You should check bit-flips in complete OOB region (mtd->oobsize) not just ecc.bytes.

I was under the impression that OOB data bytes cannot be assumed to be
ECC protected.
As it happens, when using Internal ECC on those Micron chips, *some*
of the OOB databytes
are ECC protected, but I didn't think it was necessary to count those
for bitflips, since OOB users
won't assume ECC protection anyhow.  Am I wrong about that?

  --david
pekon gupta March 28, 2014, 5:27 p.m. UTC | #9
Hi Gerhard,
[...]

>> >when "on die ECC" is enabled, the chip requires a sequence of
>> >- read page command (READ0, addresses, READSTART)
>> >- status check (mandatory! cmd 70, to check FAIL and REWRITE bits)
>> >- re-enter data read mode (READ0, _without_ addresses and START)
>> >- fetch data bytes
>> >
>> I checked Micron Datasheet for "MT29F4G16ABADAWP by name
>> "m60a_4gb_8gb_16gb_ecc_nand.pdf" and on its
>> "Figure 39: READ PAGE (00h-30h) Operation with Internal ECC Enabled"
>> There is no mention of above third step.
>>
>> As per the "figure 39" a READ_PAGE Operation with internal ECC enabled
>> requires following sequence..
>> - [00h] <address> <address> ... <address> [30h]  /* READ_PAGE command */
>> - [70h] <status> [00h]        /* READ_STATUS command */
>> And after that data comes our serial on the bus. So in that case you can
>
>They cheat. :)  You'll only notice when you look closely, I
>misread this stuff too at first.  The Micron datasheet uses
>different terms than the MTD implementation in Linux does.
>
>Their "READ_PAGE" command, referred to as 00-30, actually is in
>Linux the "READ0 command (0x00) and READSTART command (0x30)"
>sequence.  (I'd like to see them as sequences or aggregates of
>existing commands, instead of even more commands that happen to
>re-use other command's opcodes, which I feel is confusing.)
>
Yes, I think its correctly implemented in nand_command_lp()
(Kindly cross check if I misunderstood below)

For chip->cmd(mtd, NAND_CMD_READ0, column, page);

@@nand_command_lp(..)
(Step-1) : Sends READ0 == [00h]
         /* Command latch cycle */
	chip->cmd_ctrl(mtd, command, NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);

(Step-2): Sends <column-address> .. <page-address>
        if (column != -1) {
        ...
        }
        if (page != -1) {
        ...
        }

(Step-3): Send READ_START == [30h]
                switch (command) {
                ...
                case NAND_CMD_READ0:
                         chip->cmd_ctrl(mtd, NAND_CMD_READSTART,
			          NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
                         chip->cmd_ctrl(mtd, NAND_CMD_NONE,
			          NAND_NCE | NAND_CTRL_CHANGE);
                }

So, I think nand_command_lp() follows the sequence as per Micron device.
As it has been working for normal nand_reads also.


>The above "70 status 00" actually is "STATUS (command 70 and a
>status byte) plus READ0 (command 00)".  Not re-emitting the READ0
>(this time without the addresses and READSTART) would make you
>fetch more status bytes instead of the cached data bytes.
>
Yes, this is why I said ("this may require some tweaks").
So, actually you can modify nand_command_lp()
- to add new command = NAND_CMD_END_STATUS which just issues [00h],
- OR modify existing NAND_CMD_STATUS to do something similar.

But I think for most of the command sequences nand_command_lp()
can be re-used, if not then please improve it.

with regards, pekon
Brian Norris April 1, 2014, 8:36 a.m. UTC | #10
On Fri, Mar 28, 2014 at 09:52:37AM -0600, David Mosberger wrote:
> On Thu, Mar 27, 2014 at 12:56 AM, Gupta, Pekon <pekon@ti.com> wrote:
> 
> >>+      set_on_die_ecc(mtd, chip, 1);
> >>+
> >>+      chkoob = chkbuf + mtd->writesize;
> >>+      rawoob = rawbuf + mtd->writesize;
> >>+      eccpos = chip->ecc.layout->eccpos;
> >>+      for (i = 0; i < chip->ecc.steps; ++i) {
> >>+              /* Count bit flips in the actual data area: */
> >>+              flips = bitdiff(chkbuf, rawbuf, chip->ecc.size);
> >>+              /* Count bit flips in the ECC bytes: */
> >>+              for (j = 0; j < chip->ecc.bytes; ++j) {
> >
> > You should check bit-flips in complete OOB region (mtd->oobsize) not just ecc.bytes.
> 
> I was under the impression that OOB data bytes cannot be assumed to be
> ECC protected.
> As it happens, when using Internal ECC on those Micron chips, *some*
> of the OOB databytes
> are ECC protected, but I didn't think it was necessary to count those
> for bitflips, since OOB users
> won't assume ECC protection anyhow.  Am I wrong about that?

This is a touchy subject. Most of your comments are correct;
traditionally, ECC did not protect OOB, and some of the main users of it
(like JFFS2) assume that it isn't. This is patently false on some modern
systems, which do protect it.

But for this case, the max_bitflips count is used for determining when
the error rate is unacceptably high, not just to see whether your data
is currently corrupt. So if extra bitflips in this (otherwise
unimportant) spare area might eventually cause the ECC hardware to
report an error, then we need to count them.

Brian
Brian Norris April 1, 2014, 8:47 a.m. UTC | #11
On Fri, Mar 28, 2014 at 01:56:00PM +0100, Gerhard Sittig wrote:
> There must have been a reason for the
> introduction of the "oob_required" condition -- I just understood
> it was other MLC chips, not the on-die-ECC that we are talking
> about here.

'oob_required' is really only there for hardware which optimizes the
no-OOB case for program and read (I have an out-of-tree driver for
hardware whose DMA engine accelerates data-only read/program; ECC is
done on-the-fly in hardware).

Brian
David Mosberger-Tang April 1, 2014, 3:10 p.m. UTC | #12
[Forward as plain text.  Thanks, Gmail...]

---------- Forwarded message ----------
From: David Mosberger <davidm@egauge.net>
Date: Tue, Apr 1, 2014 at 9:09 AM
Subject: Re: [PATCH] mtd: nand: Add support for Micron on-die ECC
controller (rev2).
To: Brian Norris <computersforpeace@gmail.com>
Cc: "Gupta, Pekon" <pekon@ti.com>, "dedekind1@gmail.com"
<dedekind1@gmail.com>, "linux-mtd@lists.infradead.org"
<linux-mtd@lists.infradead.org>, Gerhard Sittig <gsi@denx.de>


On Tue, Apr 1, 2014 at 2:36 AM, Brian Norris
<computersforpeace@gmail.com> wrote:
>
>
> But for this case, the max_bitflips count is used for determining when
> the error rate is unacceptably high, not just to see whether your data
> is currently corrupt. So if extra bitflips in this (otherwise
> unimportant) spare area might eventually cause the ECC hardware to
> report an error, then we need to count them.
>

To do this properly would require describing which OOB bytes are
included in the ECC.
I don't think that's possible with the current nand_ecclayout, no?

Also, effectively what my patch does is to make those OOB bytes not
ECC protected, which
is perfectly fine semantics as far as Linux is concerned.  So yes,
what you say would be
an improvement, but it's not strictly needed.

  --david
diff mbox

Patch

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 5826da3..e642d1f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -78,6 +78,20 @@  static struct nand_ecclayout nand_oob_64 = {
 		 .length = 38} }
 };
 
+static struct nand_ecclayout nand_oob_64_on_die = {
+	.eccbytes = 32,
+	.eccpos = {
+		    8,  9, 10, 11, 12, 13, 14, 15,
+		   24, 25, 26, 27, 28, 29, 30, 31,
+		   40, 41, 42, 43, 44, 45, 46, 47,
+		   56, 57, 58, 59, 60, 61, 62, 63},
+	.oobfree = {
+		{.offset =  4,	 .length = 4},
+		{.offset = 20,	 .length = 4},
+		{.offset = 36,	 .length = 4},
+		{.offset = 52,	 .length = 4}}
+};
+
 static struct nand_ecclayout nand_oob_128 = {
 	.eccbytes = 48,
 	.eccpos = {
@@ -1250,6 +1264,196 @@  static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
 	return max_bitflips;
 }
 
+static int
+set_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip, int on)
+{
+	u8 data[ONFI_SUBFEATURE_PARAM_LEN] = { 0, };
+
+	if (chip->ecc.mode != NAND_ECC_HW_ON_DIE)
+		return 0;
+
+	if (on)
+		data[0] = ONFI_FEATURE_OP_MODE_ENABLE_ON_DIE_ECC;
+
+	return chip->onfi_set_features(mtd, chip, ONFI_FEATURE_ADDR_OP_MODE,
+				       data);
+}
+
+/*
+ * Return the number of bits that differ between buffers SRC1 and
+ * SRC2, both of which are LEN bytes long.
+ *
+ * This code could be optimized for, but it only gets called on pages
+ * with bitflips and compared to the cost of migrating an eraseblock,
+ * the execution time here is trivial...
+ */
+static int
+bitdiff(const void *s1, const void *s2, size_t len)
+{
+	const uint8_t *src1 = s1, *src2 = s2;
+	int count = 0, i;
+
+	for (i = 0; i < len; ++i)
+		count += hweight8(*src1++ ^ *src2++);
+	return count;
+}
+
+static int
+check_for_bitflips(struct mtd_info *mtd, struct nand_chip *chip, int page)
+{
+	int flips = 0, max_bitflips = 0, i, j, read_size;
+	uint8_t *chkbuf, *rawbuf, *chkoob, *rawoob;
+	uint32_t *eccpos;
+
+	chkbuf = chip->buffers->chkbuf;
+	rawbuf = chip->buffers->rawbuf;
+	read_size = mtd->writesize + mtd->oobsize;
+
+	/* Read entire page w/OOB area with on-die ECC on: */
+	chip->read_buf(mtd, chkbuf, read_size);
+
+	/* Re-read page with on-die ECC off: */
+	set_on_die_ecc(mtd, chip, 0);
+	{
+		chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+		chip->read_buf(mtd, rawbuf, read_size);
+	}
+	set_on_die_ecc(mtd, chip, 1);
+
+	chkoob = chkbuf + mtd->writesize;
+	rawoob = rawbuf + mtd->writesize;
+	eccpos = chip->ecc.layout->eccpos;
+	for (i = 0; i < chip->ecc.steps; ++i) {
+		/* Count bit flips in the actual data area: */
+		flips = bitdiff(chkbuf, rawbuf, chip->ecc.size);
+		/* Count bit flips in the ECC bytes: */
+		for (j = 0; j < chip->ecc.bytes; ++j) {
+			flips += hweight8(chkoob[*eccpos] ^ rawoob[*eccpos]);
+			++eccpos;
+		}
+		if (flips > 0)
+			mtd->ecc_stats.corrected += flips;
+		max_bitflips = max_t(int, max_bitflips, flips);
+		chkbuf += chip->ecc.size;
+		rawbuf += chip->ecc.size;
+	}
+
+	/* Re-issue the READ command for the actual data read that follows.  */
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+
+	return max_bitflips;
+}
+
+static int check_read_status_on_die(struct mtd_info *mtd,
+				    struct nand_chip *chip, int page)
+{
+	int max_bitflips = 0;
+	uint8_t status;
+
+	/* Check ECC status of page just transferred into NAND's page buffer: */
+	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
+	status = chip->read_byte(mtd);
+
+	/* Switch back to data reading: */
+	chip->cmd_ctrl(mtd, NAND_CMD_READ0,
+		       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
+	chip->cmd_ctrl(mtd, NAND_CMD_NONE,
+		       NAND_NCE | NAND_CTRL_CHANGE);
+
+	if (status & NAND_STATUS_FAIL) {
+		/* Page has invalid ECC. */
+		mtd->ecc_stats.failed++;
+	} else if (status & NAND_STATUS_REWRITE) {
+		/*
+		 * The Micron chips turn on the REWRITE status bit for
+		 * ANY bit flips.  Some pages have stuck bits, so we
+		 * don't want to migrate a block just because of
+		 * single bit errors because otherwise, that block
+		 * would effectively become unusable.  So, work out in
+		 * software what the max number of flipped bits is for
+		 * all subpages in a page:
+		 */
+		max_bitflips = check_for_bitflips(mtd, chip, page);
+	}
+	return max_bitflips;
+}
+
+/**
+ * nand_read_subpage_on_die - [REPLACEABLE] raw sub-page read function
+ * @mtd: mtd info structure
+ * @chip: nand chip info structure
+ * @data_offs: offset of requested data within the page
+ * @readlen: data length
+ * @bufpoi: buffer to store read data
+ */
+static int nand_read_subpage_on_die(struct mtd_info *mtd,
+				    struct nand_chip *chip,
+				    uint32_t data_offs, uint32_t readlen,
+				    uint8_t *bufpoi, int page)
+{
+	int start_step, end_step, num_steps, ret;
+	int data_col_addr;
+	int datafrag_len;
+	uint32_t failed;
+	uint8_t *p;
+
+	/* Column address within the page aligned to ECC size */
+	start_step = data_offs / chip->ecc.size;
+	end_step = (data_offs + readlen - 1) / chip->ecc.size;
+	num_steps = end_step - start_step + 1;
+
+	/* Data size aligned to ECC ecc.size */
+	datafrag_len = num_steps * chip->ecc.size;
+	data_col_addr = start_step * chip->ecc.size;
+	p = bufpoi + data_col_addr;
+
+	failed = mtd->ecc_stats.failed;
+
+	ret = check_read_status_on_die(mtd, chip, page);
+	if (ret < 0 || mtd->ecc_stats.failed != failed) {
+		memset(p, 0, datafrag_len);
+		return ret;
+	}
+
+	/* If we read not a page aligned data */
+	if (data_col_addr != 0)
+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, data_col_addr, -1);
+
+	chip->read_buf(mtd, p, datafrag_len);
+
+	return ret;
+}
+
+/**
+ * nand_read_page_on_die - [INTERN] read raw page data without ecc
+ * @mtd: mtd info structure
+ * @chip: nand chip info structure
+ * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
+ * @page: page number to read
+ */
+static int nand_read_page_on_die(struct mtd_info *mtd, struct nand_chip *chip,
+				 uint8_t *buf, int oob_required, int page)
+{
+	uint32_t failed;
+	int ret;
+
+	failed = mtd->ecc_stats.failed;
+
+	ret = check_read_status_on_die(mtd, chip, page);
+	if (ret < 0 || mtd->ecc_stats.failed != failed) {
+		memset(buf, 0, mtd->writesize);
+		if (oob_required)
+			memset(chip->oob_poi, 0, mtd->oobsize);
+		return ret;
+	}
+
+	chip->read_buf(mtd, buf, mtd->writesize);
+	if (oob_required)
+		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	return ret;
+}
+
 /**
  * nand_read_page_hwecc - [REPLACEABLE] hardware ECC based page read function
  * @mtd: mtd info structure
@@ -3783,22 +3987,46 @@  EXPORT_SYMBOL(nand_scan_ident);
 int nand_scan_tail(struct mtd_info *mtd)
 {
 	int i;
+	u8 features[ONFI_SUBFEATURE_PARAM_LEN];
 	struct nand_chip *chip = mtd->priv;
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	struct nand_buffers *nbuf;
 
+	if (chip->onfi_get_features(mtd, chip, ONFI_FEATURE_ADDR_OP_MODE,
+				    features) >= 0) {
+		if (features[0] & ONFI_FEATURE_OP_MODE_ENABLE_ON_DIE_ECC) {
+			/*
+			 * If the chip has on-die ECC enabled, we kind
+			 * of have to do the same...
+			 */
+			chip->ecc.mode = NAND_ECC_HW_ON_DIE;
+			pr_info("Using on-die ECC\n");
+		}
+	}
+
 	/* New bad blocks should be marked in OOB, flash-based BBT, or both */
 	BUG_ON((chip->bbt_options & NAND_BBT_NO_OOB_BBM) &&
 			!(chip->bbt_options & NAND_BBT_USE_FLASH));
 
 	if (!(chip->options & NAND_OWN_BUFFERS)) {
+		size_t on_die_bufsz = 0;
+
+		if (chip->ecc.mode == NAND_ECC_HW_ON_DIE)
+			on_die_bufsz = 2*(mtd->writesize + mtd->oobsize);
+
 		nbuf = kzalloc(sizeof(*nbuf) + mtd->writesize
-				+ mtd->oobsize * 3, GFP_KERNEL);
+				+ mtd->oobsize * 3 + on_die_bufsz, GFP_KERNEL);
 		if (!nbuf)
 			return -ENOMEM;
 		nbuf->ecccalc = (uint8_t *)(nbuf + 1);
 		nbuf->ecccode = nbuf->ecccalc + mtd->oobsize;
 		nbuf->databuf = nbuf->ecccode + mtd->oobsize;
+		if (chip->ecc.mode == NAND_ECC_HW_ON_DIE) {
+			nbuf->chkbuf = (nbuf->databuf + mtd->writesize
+					+ mtd->oobsize);
+			nbuf->rawbuf = (nbuf->chkbuf + mtd->writesize
+					+ mtd->oobsize);
+		}
 
 		chip->buffers = nbuf;
 	} else {
@@ -3956,6 +4184,25 @@  int nand_scan_tail(struct mtd_info *mtd)
 		ecc->strength = ecc->bytes * 8 / fls(8 * ecc->size);
 		break;
 
+	case NAND_ECC_HW_ON_DIE:
+		/* nand_bbt attempts to put Bbt marker at offset 8 in
+		   oob, which is used for ECC by Micron
+		   MT29F4G16ABADAWP, for example.  Fixed by not using
+		   OOB for BBT marker.  */
+		chip->bbt_options |= NAND_BBT_NO_OOB;
+		chip->ecc.layout = &nand_oob_64_on_die;
+		chip->ecc.read_page = nand_read_page_on_die;
+		chip->ecc.read_subpage = nand_read_subpage_on_die;
+		chip->ecc.write_page = nand_write_page_raw;
+		chip->ecc.read_oob = nand_read_oob_std;
+		chip->ecc.read_page_raw = nand_read_page_raw;
+		chip->ecc.write_page_raw = nand_write_page_raw;
+		chip->ecc.write_oob = nand_write_oob_std;
+		chip->ecc.size = 512;
+		chip->ecc.bytes = 8;
+		chip->ecc.strength = 4;
+		break;
+
 	case NAND_ECC_NONE:
 		pr_warn("NAND_ECC_NONE selected by board driver. "
 			   "This is not recommended!\n");
@@ -4023,8 +4270,13 @@  int nand_scan_tail(struct mtd_info *mtd)
 	/* Invalidate the pagebuffer reference */
 	chip->pagebuf = -1;
 
-	/* Large page NAND with SOFT_ECC should support subpage reads */
-	if ((ecc->mode == NAND_ECC_SOFT) && (chip->page_shift > 9))
+	/*
+	 * Large page NAND with SOFT_ECC or on-die ECC should support
+	 * subpage reads.
+	 */
+	if (((ecc->mode == NAND_ECC_SOFT)
+	     || (chip->ecc.mode == NAND_ECC_HW_ON_DIE))
+	    && (chip->page_shift > 9))
 		chip->options |= NAND_SUBPAGE_READ;
 
 	/* Fill in remaining MTD driver data */
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 450d61e..4514ced 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -101,6 +101,7 @@  extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 /* Status bits */
 #define NAND_STATUS_FAIL	0x01
 #define NAND_STATUS_FAIL_N1	0x02
+#define NAND_STATUS_REWRITE	0x08
 #define NAND_STATUS_TRUE_READY	0x20
 #define NAND_STATUS_READY	0x40
 #define NAND_STATUS_WP		0x80
@@ -115,6 +116,7 @@  typedef enum {
 	NAND_ECC_HW_SYNDROME,
 	NAND_ECC_HW_OOB_FIRST,
 	NAND_ECC_SOFT_BCH,
+	NAND_ECC_HW_ON_DIE,
 } nand_ecc_modes_t;
 
 /*
@@ -214,6 +216,10 @@  struct nand_chip;
 /* Vendor-specific feature address (Micron) */
 #define ONFI_FEATURE_ADDR_READ_RETRY	0x89
 
+/* Vendor-specific array operation mode (Micron) */
+#define ONFI_FEATURE_ADDR_OP_MODE	0x90
+#define ONFI_FEATURE_OP_MODE_ENABLE_ON_DIE_ECC		0x08
+
 /* ONFI subfeature parameters length */
 #define ONFI_SUBFEATURE_PARAM_LEN	4
 
@@ -516,6 +522,8 @@  struct nand_buffers {
 	uint8_t	*ecccalc;
 	uint8_t	*ecccode;
 	uint8_t *databuf;
+	uint8_t *chkbuf;
+	uint8_t *rawbuf;
 };
 
 /**