diff mbox series

[v2,3/3] mtd: rawnand: Support for sequential cache reads

Message ID 20230112093637.987838-4-miquel.raynal@bootlin.com
State Accepted
Headers show
Series mtd: rawnand: Sequential page reads | expand

Commit Message

Miquel Raynal Jan. 12, 2023, 9:36 a.m. UTC
From: JaimeLiao <jaimeliao.tw@gmail.com>

Add support for sequential cache reads for controllers using the generic
core helpers for their fast read/write helpers.

Sequential reads may reduce the overhead when accessing physically
continuous data by loading in cache the next page while the previous
page gets sent out on the NAND bus.

The ONFI specification provides the following additional commands to
handle sequential cached reads:

* 0x31 - READ CACHE SEQUENTIAL:
  Requires the NAND chip to load the next page into cache while keeping
  the current cache available for host reads.
* 0x3F - READ CACHE END:
  Tells the NAND chip this is the end of the sequential cache read, the
  current cache shall remain accessible for the host but no more
  internal cache loading operation is required.

On the bus, a multi page read operation is currently handled like this:

	00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
	00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
	00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN

Sequential cached reads may instead be achieved with:

	00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
		       3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN

Below are the read speed test results with regular reads and
sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
a NAND chip characterized with the following timings:
* tR: 20 µs
* tRCBSY: 5 µs
* tRR: 20 ns
and the following geometry:
* device size: 2 MiB
* eraseblock size: 128 kiB
* page size: 2 kiB

============= Normal read @ 33MHz =================
mtd_speedtest: eraseblock read speed is 15633 KiB/s
mtd_speedtest: page read speed is 15515 KiB/s
mtd_speedtest: 2 page read speed is 15398 KiB/s
===================================================

========= Sequential cache read @ 33MHz ===========
mtd_speedtest: eraseblock read speed is 18285 KiB/s
mtd_speedtest: page read speed is 15875 KiB/s
mtd_speedtest: 2 page read speed is 16253 KiB/s
===================================================

We observe an overall speed improvement of about 5% when reading
2 pages, up to 15% when reading an entire block. This is due to the
~14us gain on each additional page read (tR - (tRCBSY + tRR)).

Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
---
 drivers/mtd/nand/raw/nand_base.c | 119 +++++++++++++++++++++++++++++--
 include/linux/mtd/rawnand.h      |   9 +++
 2 files changed, 124 insertions(+), 4 deletions(-)

Comments

liao jaime Jan. 13, 2023, 8:46 a.m. UTC | #1
>
> From: JaimeLiao <jaimeliao.tw@gmail.com>
>
> Add support for sequential cache reads for controllers using the generic
> core helpers for their fast read/write helpers.
>
> Sequential reads may reduce the overhead when accessing physically
> continuous data by loading in cache the next page while the previous
> page gets sent out on the NAND bus.
>
> The ONFI specification provides the following additional commands to
> handle sequential cached reads:
>
> * 0x31 - READ CACHE SEQUENTIAL:
>   Requires the NAND chip to load the next page into cache while keeping
>   the current cache available for host reads.
> * 0x3F - READ CACHE END:
>   Tells the NAND chip this is the end of the sequential cache read, the
>   current cache shall remain accessible for the host but no more
>   internal cache loading operation is required.
>
> On the bus, a multi page read operation is currently handled like this:
>
>         00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
>         00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
>         00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
>
> Sequential cached reads may instead be achieved with:
>
>         00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
>                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
>                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
>                        3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
>
> Below are the read speed test results with regular reads and
> sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
> a NAND chip characterized with the following timings:
> * tR: 20 µs
> * tRCBSY: 5 µs
> * tRR: 20 ns
> and the following geometry:
> * device size: 2 MiB
> * eraseblock size: 128 kiB
> * page size: 2 kiB
>
> ============= Normal read @ 33MHz =================
> mtd_speedtest: eraseblock read speed is 15633 KiB/s
> mtd_speedtest: page read speed is 15515 KiB/s
> mtd_speedtest: 2 page read speed is 15398 KiB/s
> ===================================================
>
> ========= Sequential cache read @ 33MHz ===========
> mtd_speedtest: eraseblock read speed is 18285 KiB/s
> mtd_speedtest: page read speed is 15875 KiB/s
> mtd_speedtest: 2 page read speed is 16253 KiB/s
> ===================================================
>
> We observe an overall speed improvement of about 5% when reading
> 2 pages, up to 15% when reading an entire block. This is due to the
> ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
>
> Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
> ---
>  drivers/mtd/nand/raw/nand_base.c | 119 +++++++++++++++++++++++++++++--
>  include/linux/mtd/rawnand.h      |   9 +++
>  2 files changed, 124 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
> index 34395d5d3a47..0b1fd6bbb36b 100644
> --- a/drivers/mtd/nand/raw/nand_base.c
> +++ b/drivers/mtd/nand/raw/nand_base.c
> @@ -1208,6 +1208,73 @@ static int nand_lp_exec_read_page_op(struct nand_chip *chip, unsigned int page,
>         return nand_exec_op(chip, &op);
>  }
>
> +static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int page,
> +                                         unsigned int offset_in_page, void *buf,
> +                                         unsigned int len, bool check_only)
> +{
> +       const struct nand_interface_config *conf =
> +               nand_get_interface_config(chip);
> +       u8 addrs[5];
> +       struct nand_op_instr start_instrs[] = {
> +               NAND_OP_CMD(NAND_CMD_READ0, 0),
> +               NAND_OP_ADDR(4, addrs, 0),
> +               NAND_OP_CMD(NAND_CMD_READSTART, NAND_COMMON_TIMING_NS(conf, tWB_max)),
> +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max), 0),
> +               NAND_OP_CMD(NAND_CMD_READCACHESEQ, NAND_COMMON_TIMING_NS(conf, tWB_max)),
> +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
> +                                NAND_COMMON_TIMING_NS(conf, tRR_min)),
> +               NAND_OP_DATA_IN(len, buf, 0),
> +       };
> +       struct nand_op_instr cont_instrs[] = {
> +               NAND_OP_CMD(page == chip->cont_read.last_page ?
> +                           NAND_CMD_READCACHEEND : NAND_CMD_READCACHESEQ,
> +                           NAND_COMMON_TIMING_NS(conf, tWB_max)),
> +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
> +                                NAND_COMMON_TIMING_NS(conf, tRR_min)),
> +               NAND_OP_DATA_IN(len, buf, 0),
> +       };
> +       struct nand_operation start_op = NAND_OPERATION(chip->cur_cs, start_instrs);
> +       struct nand_operation cont_op = NAND_OPERATION(chip->cur_cs, cont_instrs);
> +       int ret;
> +
> +       if (!len) {
> +               start_op.ninstrs--;
> +               cont_op.ninstrs--;
> +       }
> +
> +       ret = nand_fill_column_cycles(chip, addrs, offset_in_page);
> +       if (ret < 0)
> +               return ret;
> +
> +       addrs[2] = page;
> +       addrs[3] = page >> 8;
> +
> +       if (chip->options & NAND_ROW_ADDR_3) {
> +               addrs[4] = page >> 16;
> +               start_instrs[1].ctx.addr.naddrs++;
> +       }
> +
> +       /* Check if cache reads are supported */
> +       if (check_only) {
> +               if (nand_check_op(chip, &start_op) || nand_check_op(chip, &cont_op))
> +                       return -EOPNOTSUPP;
> +
> +               return 0;
> +       }
> +
> +       if (page == chip->cont_read.first_page)
> +               return nand_exec_op(chip, &start_op);
> +       else
> +               return nand_exec_op(chip, &cont_op);
> +}
> +
> +static bool rawnand_cont_read_ongoing(struct nand_chip *chip, unsigned int page)
> +{
> +       return chip->cont_read.ongoing &&
> +               page >= chip->cont_read.first_page &&
> +               page <= chip->cont_read.last_page;
> +}
> +
>  /**
>   * nand_read_page_op - Do a READ PAGE operation
>   * @chip: The NAND chip
> @@ -1233,10 +1300,16 @@ int nand_read_page_op(struct nand_chip *chip, unsigned int page,
>                 return -EINVAL;
>
>         if (nand_has_exec_op(chip)) {
> -               if (mtd->writesize > 512)
> -                       return nand_lp_exec_read_page_op(chip, page,
> -                                                        offset_in_page, buf,
> -                                                        len);
> +               if (mtd->writesize > 512) {
> +                       if (rawnand_cont_read_ongoing(chip, page))
> +                               return nand_lp_exec_cont_read_page_op(chip, page,
> +                                                                     offset_in_page,
> +                                                                     buf, len, false);
> +                       else
> +                               return nand_lp_exec_read_page_op(chip, page,
> +                                                                offset_in_page, buf,
> +                                                                len);
> +               }
>
>                 return nand_sp_exec_read_page_op(chip, page, offset_in_page,
>                                                  buf, len);
> @@ -3353,6 +3426,27 @@ static uint8_t *nand_transfer_oob(struct nand_chip *chip, uint8_t *oob,
>         return NULL;
>  }
>
> +static void rawnand_enable_cont_reads(struct nand_chip *chip, unsigned int page,
> +                                     u32 readlen, int col)
> +{
> +       struct mtd_info *mtd = nand_to_mtd(chip);
> +
> +       if (!chip->controller->supported_op.cont_read)
> +               return;
> +
> +       if ((col && col + readlen < (3 * mtd->writesize)) ||
> +           (!col && readlen < (2 * mtd->writesize))) {
> +               chip->cont_read.ongoing = false;
> +               return;
> +       }
> +
> +       chip->cont_read.ongoing = true;
> +       chip->cont_read.first_page = page;
> +       if (col)
> +               chip->cont_read.first_page++;
> +       chip->cont_read.last_page = page + ((readlen >> chip->page_shift) & chip->pagemask);
> +}
> +
>  /**
>   * nand_setup_read_retry - [INTERN] Set the READ RETRY mode
>   * @chip: NAND chip object
> @@ -3426,6 +3520,8 @@ static int nand_do_read_ops(struct nand_chip *chip, loff_t from,
>         oob = ops->oobbuf;
>         oob_required = oob ? 1 : 0;
>
> +       rawnand_enable_cont_reads(chip, page, readlen, col);
> +
>         while (1) {
>                 struct mtd_ecc_stats ecc_stats = mtd->ecc_stats;
>
> @@ -5009,12 +5105,27 @@ static void rawnand_early_check_supported_ops(struct nand_chip *chip)
>         rawnand_check_data_only_read_support(chip);
>  }
>
> +static void rawnand_check_cont_read_support(struct nand_chip *chip)
> +{
> +       struct mtd_info *mtd = nand_to_mtd(chip);
> +
> +       if (chip->read_retries)
> +               return;
> +
> +       if (!nand_lp_exec_cont_read_page_op(chip, 0, 0, NULL,
> +                                           mtd->writesize, true))
> +               chip->controller->supported_op.cont_read = 1;
> +}
> +
>  static void rawnand_late_check_supported_ops(struct nand_chip *chip)
>  {
>         /* The supported_op fields should not be set by individual drivers */
> +       WARN_ON_ONCE(chip->controller->supported_op.cont_read);
>
>         if (!nand_has_exec_op(chip))
>                 return;
> +
> +       rawnand_check_cont_read_support(chip);
>  }
>
>  /*
> diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
> index 28c5dce782dd..1b0936fe3c6e 100644
> --- a/include/linux/mtd/rawnand.h
> +++ b/include/linux/mtd/rawnand.h
> @@ -67,6 +67,8 @@ struct gpio_desc;
>
>  /* Extended commands for large page devices */
>  #define NAND_CMD_READSTART     0x30
> +#define NAND_CMD_READCACHESEQ  0x31
> +#define NAND_CMD_READCACHEEND  0x3f
>  #define NAND_CMD_RNDOUTSTART   0xE0
>  #define NAND_CMD_CACHEDPROG    0x15
>
> @@ -1099,12 +1101,14 @@ struct nand_controller_ops {
>   * @supported_op.data_only_read: The controller supports reading more data from
>   *                     the bus without restarting an entire read operation nor
>   *                     changing the column.
> + * @supported_op.cont_read: The controller supports sequential cache reads.
>   */
>  struct nand_controller {
>         struct mutex lock;
>         const struct nand_controller_ops *ops;
>         struct {
>                 unsigned int data_only_read: 1;
> +               unsigned int cont_read: 1;
>         } supported_op;
>  };
>
> @@ -1308,6 +1312,11 @@ struct nand_chip {
>         int read_retries;
>         struct nand_secure_region *secure_regions;
>         u8 nr_secure_regions;
> +       struct {
> +               bool ongoing;
> +               unsigned int first_page;
> +               unsigned int last_page;
> +       } cont_read;
>
>         /* Externals */
>         struct nand_controller *controller;
> --
> 2.34.1
>

Tested-by: Liao Jaime <jaimeliao.tw@gmail.com>
Miquel Raynal Jan. 13, 2023, 4:36 p.m. UTC | #2
On Thu, 2023-01-12 at 09:36:37 UTC, Miquel Raynal wrote:
> From: JaimeLiao <jaimeliao.tw@gmail.com>
> 
> Add support for sequential cache reads for controllers using the generic
> core helpers for their fast read/write helpers.
> 
> Sequential reads may reduce the overhead when accessing physically
> continuous data by loading in cache the next page while the previous
> page gets sent out on the NAND bus.
> 
> The ONFI specification provides the following additional commands to
> handle sequential cached reads:
> 
> * 0x31 - READ CACHE SEQUENTIAL:
>   Requires the NAND chip to load the next page into cache while keeping
>   the current cache available for host reads.
> * 0x3F - READ CACHE END:
>   Tells the NAND chip this is the end of the sequential cache read, the
>   current cache shall remain accessible for the host but no more
>   internal cache loading operation is required.
> 
> On the bus, a multi page read operation is currently handled like this:
> 
> 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> 	00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> 	00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> 
> Sequential cached reads may instead be achieved with:
> 
> 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> 		       3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> 
> Below are the read speed test results with regular reads and
> sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
> a NAND chip characterized with the following timings:
> * tR: 20 µs
> * tRCBSY: 5 µs
> * tRR: 20 ns
> and the following geometry:
> * device size: 2 MiB
> * eraseblock size: 128 kiB
> * page size: 2 kiB
> 
> ============= Normal read @ 33MHz =================
> mtd_speedtest: eraseblock read speed is 15633 KiB/s
> mtd_speedtest: page read speed is 15515 KiB/s
> mtd_speedtest: 2 page read speed is 15398 KiB/s
> ===================================================
> 
> ========= Sequential cache read @ 33MHz ===========
> mtd_speedtest: eraseblock read speed is 18285 KiB/s
> mtd_speedtest: page read speed is 15875 KiB/s
> mtd_speedtest: 2 page read speed is 16253 KiB/s
> ===================================================
> 
> We observe an overall speed improvement of about 5% when reading
> 2 pages, up to 15% when reading an entire block. This is due to the
> ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> 
> Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
> Tested-by: Liao Jaime <jaimeliao.tw@gmail.com>

Applied to https://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git nand/next.

Miquel
Zhihao Cheng March 3, 2023, 12:26 p.m. UTC | #3
Hi all,

> From: JaimeLiao <jaimeliao.tw@gmail.com>
> 
> Add support for sequential cache reads for controllers using the generic
> core helpers for their fast read/write helpers.
> 
> Sequential reads may reduce the overhead when accessing physically
> continuous data by loading in cache the next page while the previous
> page gets sent out on the NAND bus.
> 
> The ONFI specification provides the following additional commands to
> handle sequential cached reads:
> 
> * 0x31 - READ CACHE SEQUENTIAL:
>    Requires the NAND chip to load the next page into cache while keeping
>    the current cache available for host reads.
> * 0x3F - READ CACHE END:
>    Tells the NAND chip this is the end of the sequential cache read, the
>    current cache shall remain accessible for the host but no more
>    internal cache loading operation is required.
> 
> On the bus, a multi page read operation is currently handled like this:
> 
> 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> 	00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> 	00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> 
> Sequential cached reads may instead be achieved with:
I find an ecc error problem in latest kernel(nandsim + ubi):

[   71.097739] [nandsim] error: write_byte: unknown command 0x31
[   71.100871] [nandsim] error: write_byte: unknown command 0x31
[   71.101508] [nandsim] warning: read_buf: unexpected data output 
cycle, current state is STATE_READY
[   71.102475] [nandsim] warning: read_buf: unexpected data output 
cycle, current state is STATE_READY
[   71.103427] ecc_sw_hamming_correct: uncorrectable ECC error
[   71.104018] ubi0 warning: ubi_io_read [ubi]: error -74 (ECC error) 
while reading 4096 bytes from PEB 180:2048, read only 4096 bytes, retry
[   71.105272] [nandsim] error: write_byte: unknown command 0x31
[   71.105901] [nandsim] error: write_byte: unknown command 0x31
[   71.106513] [nandsim] warning: read_buf: unexpected data output 
cycle, current state is STATE_READY
[   71.107392] [nandsim] warning: read_buf: unexpected data output 
cycle, current state is STATE_READY
[   71.108277] ecc_sw_hamming_correct: uncorrectable ECC error
[   71.108885] ubi0 warning: ubi_io_read [ubi]: error -74 (ECC error) 
while reading 4096 bytes from PEB 180:2048, read only 4096 bytes, retry
[   71.110230] [nandsim] error: write_byte: unknown command 0x31
[   71.110817] [nandsim] error: write_byte: unknown command 0x31
[   71.111433] [nandsim] warning: read_buf: unexpected data output 
cycle, current state is STATE_READY
[   71.112356] [nandsim] warning: read_buf: unexpected data output 
cycle, current state is STATE_READY
[   71.113232] ecc_sw_hamming_correct: uncorrectable ECC error

reproducer:
#!/bin/bash
set -e

TMP=/root/temp

mtd=/dev/mtd0
ubi=/dev/ubi0

ID="0xec,0xa1,0x00,0x15" # 128M 128KB 2KB
modprobe nandsim id_bytes=$ID
flash_eraseall /dev/mtd0
modprobe ubi mtd="0,0" fm_autoconvert
ubimkvol -N vol_a -m -n 0 /dev/ubi0
modprobe ubifs
mount -t ubifs /dev/ubi0_0 $TMP

After reverting 003fe4b9545b83cc("mtd: rawnand: Support for sequential 
cache reads") the mtd will work normally.
Måns Rullgård June 22, 2023, 2:59 p.m. UTC | #4
Miquel Raynal <miquel.raynal@bootlin.com> writes:

> From: JaimeLiao <jaimeliao.tw@gmail.com>
>
> Add support for sequential cache reads for controllers using the generic
> core helpers for their fast read/write helpers.
>
> Sequential reads may reduce the overhead when accessing physically
> continuous data by loading in cache the next page while the previous
> page gets sent out on the NAND bus.
>
> The ONFI specification provides the following additional commands to
> handle sequential cached reads:
>
> * 0x31 - READ CACHE SEQUENTIAL:
>   Requires the NAND chip to load the next page into cache while keeping
>   the current cache available for host reads.
> * 0x3F - READ CACHE END:
>   Tells the NAND chip this is the end of the sequential cache read, the
>   current cache shall remain accessible for the host but no more
>   internal cache loading operation is required.
>
> On the bus, a multi page read operation is currently handled like this:
>
> 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> 	00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> 	00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
>
> Sequential cached reads may instead be achieved with:
>
> 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> 		       3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
>
> Below are the read speed test results with regular reads and
> sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
> a NAND chip characterized with the following timings:
> * tR: 20 µs
> * tRCBSY: 5 µs
> * tRR: 20 ns
> and the following geometry:
> * device size: 2 MiB
> * eraseblock size: 128 kiB
> * page size: 2 kiB
>
> ============= Normal read @ 33MHz =================
> mtd_speedtest: eraseblock read speed is 15633 KiB/s
> mtd_speedtest: page read speed is 15515 KiB/s
> mtd_speedtest: 2 page read speed is 15398 KiB/s
> ===================================================
>
> ========= Sequential cache read @ 33MHz ===========
> mtd_speedtest: eraseblock read speed is 18285 KiB/s
> mtd_speedtest: page read speed is 15875 KiB/s
> mtd_speedtest: 2 page read speed is 16253 KiB/s
> ===================================================
>
> We observe an overall speed improvement of about 5% when reading
> 2 pages, up to 15% when reading an entire block. This is due to the
> ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
>
> Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
> ---
>  drivers/mtd/nand/raw/nand_base.c | 119 +++++++++++++++++++++++++++++--
>  include/linux/mtd/rawnand.h      |   9 +++
>  2 files changed, 124 insertions(+), 4 deletions(-)

This change broke something on a TI AM3517 based system.  What I'm
noticing is that the u-boot fw_setenv tool is failing due to the
MEMGETBADBLOCK ioctl reporting some blocks as bad when they are not.
Everything else is, somehow, working fine.  Reverting this commit fixes
it, though I don't know why.  I'm seeing the same behaviour on multiple
devices, so I doubt there is a problem with the flash memory.

Is there anything I can test to get more information?
Miquel Raynal June 22, 2023, 9:12 p.m. UTC | #5
Hi Måns,

mans@mansr.com wrote on Thu, 22 Jun 2023 15:59:25 +0100:

> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> 
> > From: JaimeLiao <jaimeliao.tw@gmail.com>
> >
> > Add support for sequential cache reads for controllers using the generic
> > core helpers for their fast read/write helpers.
> >
> > Sequential reads may reduce the overhead when accessing physically
> > continuous data by loading in cache the next page while the previous
> > page gets sent out on the NAND bus.
> >
> > The ONFI specification provides the following additional commands to
> > handle sequential cached reads:
> >
> > * 0x31 - READ CACHE SEQUENTIAL:
> >   Requires the NAND chip to load the next page into cache while keeping
> >   the current cache available for host reads.
> > * 0x3F - READ CACHE END:
> >   Tells the NAND chip this is the end of the sequential cache read, the
> >   current cache shall remain accessible for the host but no more
> >   internal cache loading operation is required.
> >
> > On the bus, a multi page read operation is currently handled like this:
> >
> > 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> > 	00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> > 	00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> >
> > Sequential cached reads may instead be achieved with:
> >
> > 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> > 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> > 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> > 		       3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> >
> > Below are the read speed test results with regular reads and
> > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
> > a NAND chip characterized with the following timings:
> > * tR: 20 µs
> > * tRCBSY: 5 µs
> > * tRR: 20 ns
> > and the following geometry:
> > * device size: 2 MiB
> > * eraseblock size: 128 kiB
> > * page size: 2 kiB
> >
> > ============= Normal read @ 33MHz =================
> > mtd_speedtest: eraseblock read speed is 15633 KiB/s
> > mtd_speedtest: page read speed is 15515 KiB/s
> > mtd_speedtest: 2 page read speed is 15398 KiB/s
> > ===================================================
> >
> > ========= Sequential cache read @ 33MHz ===========
> > mtd_speedtest: eraseblock read speed is 18285 KiB/s
> > mtd_speedtest: page read speed is 15875 KiB/s
> > mtd_speedtest: 2 page read speed is 16253 KiB/s
> > ===================================================
> >
> > We observe an overall speed improvement of about 5% when reading
> > 2 pages, up to 15% when reading an entire block. This is due to the
> > ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> >
> > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
> > ---
> >  drivers/mtd/nand/raw/nand_base.c | 119 +++++++++++++++++++++++++++++--
> >  include/linux/mtd/rawnand.h      |   9 +++
> >  2 files changed, 124 insertions(+), 4 deletions(-)  
> 
> This change broke something on a TI AM3517 based system.  What I'm
> noticing is that the u-boot fw_setenv tool is failing due to the
> MEMGETBADBLOCK ioctl reporting some blocks as bad when they are not.
> Everything else is, somehow, working fine.  Reverting this commit fixes
> it, though I don't know why.  I'm seeing the same behaviour on multiple
> devices, so I doubt there is a problem with the flash memory.
> 
> Is there anything I can test to get more information?
> 

May I know what NAND chip you are using?

Thanks,
Miquèl
Måns Rullgård June 23, 2023, 11:27 a.m. UTC | #6
Miquel Raynal <miquel.raynal@bootlin.com> writes:

> Hi Måns,
>
> mans@mansr.com wrote on Thu, 22 Jun 2023 15:59:25 +0100:
>
>> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> 
>> > From: JaimeLiao <jaimeliao.tw@gmail.com>
>> >
>> > Add support for sequential cache reads for controllers using the generic
>> > core helpers for their fast read/write helpers.
>> >
>> > Sequential reads may reduce the overhead when accessing physically
>> > continuous data by loading in cache the next page while the previous
>> > page gets sent out on the NAND bus.
>> >
>> > The ONFI specification provides the following additional commands to
>> > handle sequential cached reads:
>> >
>> > * 0x31 - READ CACHE SEQUENTIAL:
>> >   Requires the NAND chip to load the next page into cache while keeping
>> >   the current cache available for host reads.
>> > * 0x3F - READ CACHE END:
>> >   Tells the NAND chip this is the end of the sequential cache read, the
>> >   current cache shall remain accessible for the host but no more
>> >   internal cache loading operation is required.
>> >
>> > On the bus, a multi page read operation is currently handled like this:
>> >
>> > 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
>> > 	00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
>> > 	00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
>> >
>> > Sequential cached reads may instead be achieved with:
>> >
>> > 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
>> > 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
>> > 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
>> > 		       3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
>> >
>> > Below are the read speed test results with regular reads and
>> > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
>> > a NAND chip characterized with the following timings:
>> > * tR: 20 µs
>> > * tRCBSY: 5 µs
>> > * tRR: 20 ns
>> > and the following geometry:
>> > * device size: 2 MiB
>> > * eraseblock size: 128 kiB
>> > * page size: 2 kiB
>> >
>> > ============= Normal read @ 33MHz =================
>> > mtd_speedtest: eraseblock read speed is 15633 KiB/s
>> > mtd_speedtest: page read speed is 15515 KiB/s
>> > mtd_speedtest: 2 page read speed is 15398 KiB/s
>> > ===================================================
>> >
>> > ========= Sequential cache read @ 33MHz ===========
>> > mtd_speedtest: eraseblock read speed is 18285 KiB/s
>> > mtd_speedtest: page read speed is 15875 KiB/s
>> > mtd_speedtest: 2 page read speed is 16253 KiB/s
>> > ===================================================
>> >
>> > We observe an overall speed improvement of about 5% when reading
>> > 2 pages, up to 15% when reading an entire block. This is due to the
>> > ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
>> >
>> > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
>> > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
>> > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
>> > ---
>> >  drivers/mtd/nand/raw/nand_base.c | 119 +++++++++++++++++++++++++++++--
>> >  include/linux/mtd/rawnand.h      |   9 +++
>> >  2 files changed, 124 insertions(+), 4 deletions(-)  
>> 
>> This change broke something on a TI AM3517 based system.  What I'm
>> noticing is that the u-boot fw_setenv tool is failing due to the
>> MEMGETBADBLOCK ioctl reporting some blocks as bad when they are not.
>> Everything else is, somehow, working fine.  Reverting this commit fixes
>> it, though I don't know why.  I'm seeing the same behaviour on multiple
>> devices, so I doubt there is a problem with the flash memory.
>> 
>> Is there anything I can test to get more information?
>> 
>
> May I know what NAND chip you are using?

It's a Micron MT29F4G16ABBDAH4-IT:D.  From the kernel logs:

nand: device found, Manufacturer ID: 0x2c, Chip ID: 0xbc
nand: Micron MT29F4G16ABBDAH4
nand: 512 MiB, SLC, erase size: 128 KiB, page size: 2048, OOB size: 64
Miquel Raynal June 23, 2023, 2:07 p.m. UTC | #7
Hi Bean,

mans@mansr.com wrote on Fri, 23 Jun 2023 12:27:54 +0100:

> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> 
> > Hi Måns,
> >
> > mans@mansr.com wrote on Thu, 22 Jun 2023 15:59:25 +0100:
> >  
> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >>   
> >> > From: JaimeLiao <jaimeliao.tw@gmail.com>
> >> >
> >> > Add support for sequential cache reads for controllers using the generic
> >> > core helpers for their fast read/write helpers.
> >> >
> >> > Sequential reads may reduce the overhead when accessing physically
> >> > continuous data by loading in cache the next page while the previous
> >> > page gets sent out on the NAND bus.
> >> >
> >> > The ONFI specification provides the following additional commands to
> >> > handle sequential cached reads:
> >> >
> >> > * 0x31 - READ CACHE SEQUENTIAL:
> >> >   Requires the NAND chip to load the next page into cache while keeping
> >> >   the current cache available for host reads.
> >> > * 0x3F - READ CACHE END:
> >> >   Tells the NAND chip this is the end of the sequential cache read, the
> >> >   current cache shall remain accessible for the host but no more
> >> >   internal cache loading operation is required.
> >> >
> >> > On the bus, a multi page read operation is currently handled like this:
> >> >
> >> > 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> >> > 	00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> >> > 	00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> >> >
> >> > Sequential cached reads may instead be achieved with:
> >> >
> >> > 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> >> > 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> >> > 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> >> > 		       3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> >> >
> >> > Below are the read speed test results with regular reads and
> >> > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
> >> > a NAND chip characterized with the following timings:
> >> > * tR: 20 µs
> >> > * tRCBSY: 5 µs
> >> > * tRR: 20 ns
> >> > and the following geometry:
> >> > * device size: 2 MiB
> >> > * eraseblock size: 128 kiB
> >> > * page size: 2 kiB
> >> >
> >> > ============= Normal read @ 33MHz =================
> >> > mtd_speedtest: eraseblock read speed is 15633 KiB/s
> >> > mtd_speedtest: page read speed is 15515 KiB/s
> >> > mtd_speedtest: 2 page read speed is 15398 KiB/s
> >> > ===================================================
> >> >
> >> > ========= Sequential cache read @ 33MHz ===========
> >> > mtd_speedtest: eraseblock read speed is 18285 KiB/s
> >> > mtd_speedtest: page read speed is 15875 KiB/s
> >> > mtd_speedtest: 2 page read speed is 16253 KiB/s
> >> > ===================================================
> >> >
> >> > We observe an overall speed improvement of about 5% when reading
> >> > 2 pages, up to 15% when reading an entire block. This is due to the
> >> > ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> >> >
> >> > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> >> > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> >> > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
> >> > ---
> >> >  drivers/mtd/nand/raw/nand_base.c | 119 +++++++++++++++++++++++++++++--
> >> >  include/linux/mtd/rawnand.h      |   9 +++
> >> >  2 files changed, 124 insertions(+), 4 deletions(-)    
> >> 
> >> This change broke something on a TI AM3517 based system.  What I'm
> >> noticing is that the u-boot fw_setenv tool is failing due to the
> >> MEMGETBADBLOCK ioctl reporting some blocks as bad when they are not.
> >> Everything else is, somehow, working fine.  Reverting this commit fixes
> >> it, though I don't know why.  I'm seeing the same behaviour on multiple
> >> devices, so I doubt there is a problem with the flash memory.
> >> 
> >> Is there anything I can test to get more information?
> >>   
> >
> > May I know what NAND chip you are using?  
> 
> It's a Micron MT29F4G16ABBDAH4-IT:D.  From the kernel logs:
> 
> nand: device found, Manufacturer ID: 0x2c, Chip ID: 0xbc
> nand: Micron MT29F4G16ABBDAH4
> nand: 512 MiB, SLC, erase size: 128 KiB, page size: 2048, OOB size: 64

There is definitely something wrong with Micron's handling of the
sequential reads. Can you make a setup on your side with one of these
chips and try to reproduce?

I will have to discard Micron's chips if we don't find a solution very
soon, too bad, it is a nice performance improvement -when it works-.

Thanks,
Miquèl
Bean Huo June 26, 2023, 9:43 a.m. UTC | #8
I saw the NAND related to the issue with the NAND ID reported by chengzhihao1@huawei.com

> ID="0xec,0xa1,0x00,0x15" # 128M 128KB 2KB

This's a Samsung NAND flash manufacturer ID.


Hi, Jiaimeliao
You said you tested this patch, would you share what NAND and what SOC are you using?

Kind regards,
Bean

> -----Original Message-----
> From: Miquel Raynal <miquel.raynal@bootlin.com>
> Sent: Friday, June 23, 2023 4:08 PM
> To: Måns Rullgård <mans@mansr.com>
> Cc: Richard Weinberger <richard@nod.at>; Vignesh Raghavendra
> <vigneshr@ti.com>; Tudor Ambarus <Tudor.Ambarus@microchip.com>; Pratyush
> Yadav <pratyush@kernel.org>; Michael Walle <michael@walle.cc>; linux-
> mtd@lists.infradead.org; Julien Su <juliensu@mxic.com.tw>; Jaime Liao
> <jaimeliao@mxic.com.tw>; Alvin Zhou <alvinzhou@mxic.com.tw>; Thomas
> Petazzoni <thomas.petazzoni@bootlin.com>; JaimeLiao <jaimeliao.tw@gmail.com>;
> Bean Huo <beanhuo@micron.com>
> Subject: [EXT] Re: [PATCH v2 3/3] mtd: rawnand: Support for sequential cache reads
> 
> CAUTION: EXTERNAL EMAIL. Do not click links or open attachments unless you
> recognize the sender and were expecting this message.
> 
> 
> Hi Bean,
> 
> mans@mansr.com wrote on Fri, 23 Jun 2023 12:27:54 +0100:
> 
> > Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >
> > > Hi Måns,
> > >
> > > mans@mansr.com wrote on Thu, 22 Jun 2023 15:59:25 +0100:
> > >
> > >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> > >>
> > >> > From: JaimeLiao <jaimeliao.tw@gmail.com>
> > >> >
> > >> > Add support for sequential cache reads for controllers using the
> > >> > generic core helpers for their fast read/write helpers.
> > >> >
> > >> > Sequential reads may reduce the overhead when accessing
> > >> > physically continuous data by loading in cache the next page
> > >> > while the previous page gets sent out on the NAND bus.
> > >> >
> > >> > The ONFI specification provides the following additional commands
> > >> > to handle sequential cached reads:
> > >> >
> > >> > * 0x31 - READ CACHE SEQUENTIAL:
> > >> >   Requires the NAND chip to load the next page into cache while keeping
> > >> >   the current cache available for host reads.
> > >> > * 0x3F - READ CACHE END:
> > >> >   Tells the NAND chip this is the end of the sequential cache read, the
> > >> >   current cache shall remain accessible for the host but no more
> > >> >   internal cache loading operation is required.
> > >> >
> > >> > On the bus, a multi page read operation is currently handled like this:
> > >> >
> > >> >  00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> > >> >  00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> > >> >  00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> > >> >
> > >> > Sequential cached reads may instead be achieved with:
> > >> >
> > >> >  00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> > >> >                 31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> > >> >                 31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> > >> >                 3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> > >> >
> > >> > Below are the read speed test results with regular reads and
> > >> > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping
> > >> > mode with a NAND chip characterized with the following timings:
> > >> > * tR: 20 µs
> > >> > * tRCBSY: 5 µs
> > >> > * tRR: 20 ns
> > >> > and the following geometry:
> > >> > * device size: 2 MiB
> > >> > * eraseblock size: 128 kiB
> > >> > * page size: 2 kiB
> > >> >
> > >> > ============= Normal read @ 33MHz =================
> > >> > mtd_speedtest: eraseblock read speed is 15633 KiB/s
> > >> > mtd_speedtest: page read speed is 15515 KiB/s
> > >> > mtd_speedtest: 2 page read speed is 15398 KiB/s
> > >> > ===================================================
> > >> >
> > >> > ========= Sequential cache read @ 33MHz ===========
> > >> > mtd_speedtest: eraseblock read speed is 18285 KiB/s
> > >> > mtd_speedtest: page read speed is 15875 KiB/s
> > >> > mtd_speedtest: 2 page read speed is 16253 KiB/s
> > >> > ===================================================
> > >> >
> > >> > We observe an overall speed improvement of about 5% when reading
> > >> > 2 pages, up to 15% when reading an entire block. This is due to
> > >> > the ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> > >> >
> > >> > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > >> > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > >> > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
> > >> > ---
> > >> >  drivers/mtd/nand/raw/nand_base.c | 119
> +++++++++++++++++++++++++++++--
> > >> >  include/linux/mtd/rawnand.h      |   9 +++
> > >> >  2 files changed, 124 insertions(+), 4 deletions(-)
> > >>
> > >> This change broke something on a TI AM3517 based system.  What I'm
> > >> noticing is that the u-boot fw_setenv tool is failing due to the
> > >> MEMGETBADBLOCK ioctl reporting some blocks as bad when they are not.
> > >> Everything else is, somehow, working fine.  Reverting this commit
> > >> fixes it, though I don't know why.  I'm seeing the same behaviour
> > >> on multiple devices, so I doubt there is a problem with the flash memory.
> > >>
> > >> Is there anything I can test to get more information?
> > >>
> > >
> > > May I know what NAND chip you are using?
> >
> > It's a Micron MT29F4G16ABBDAH4-IT:D.  From the kernel logs:
> >
> > nand: device found, Manufacturer ID: 0x2c, Chip ID: 0xbc
> > nand: Micron MT29F4G16ABBDAH4
> > nand: 512 MiB, SLC, erase size: 128 KiB, page size: 2048, OOB size: 64
> 
> There is definitely something wrong with Micron's handling of the sequential reads.
> Can you make a setup on your side with one of these chips and try to reproduce?
> 
> I will have to discard Micron's chips if we don't find a solution very soon, too bad, it
> is a nice performance improvement -when it works-.
> 
> Thanks,
> Miquèl
Miquel Raynal July 16, 2023, 3:49 p.m. UTC | #9
Hello all,

So here is a summary of the situation:
- we have two bug reports regarding the use of sequential page reads
- both are on TI OMAP platforms: AM33XX and AM3517. I believe both are
  using the same omap2.c driver
- they use a Micron and a Samsung NAND chip

All these information gives me the hint that it is related to the
controller driver which does something silly during the exec_op phase.

Alexander and Måns, can you please tell me:
- Are you using a gpio for the waitrdy thing or do you leverage
  nand_soft_waitrdy()? If you are using the gpio, can you both try with
  the soft implementation and see if it changes something?
- Are you using any POLL or DMA prefetch mode? Can you please force the
  default in and out helpers by using only omap_nand_data_in() and
  omap_nand_data_out() to see if it changes something?

I believe there is something wrong in the timings, while properly
implemented in theory there might be some cases where we miss a barrier
or something like that. I would like to try the following two hacks,
and see if we can find what is the timing that is not observed, despite
the lack of probing. The first one is a real hack, the second one might
actually look like a real fix. Please let me know, both of you, if you
see different behaviors.

*** HACK #1 ***

--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -2113,6 +2113,9 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
        case NAND_OP_CMD_INSTR:
                iowrite8(instr->ctx.cmd.opcode,
                         info->reg.gpmc_nand_command);
+               if (instr->ctx.cmd.opcode == NAND_CMD_READCACHESEQ ||
+                   instr->ctx.cmd.opcode == NAND_CMD_READCACHEEND)
+                       udelay(50);
                break;
 
        case NAND_OP_ADDR_INSTR:

*** HACK #2 ***

--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -2143,8 +2146,10 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
                break;
        }
 
-       if (instr->delay_ns)
+       if (instr->delay_ns) {
+               mb();
                ndelay(instr->delay_ns);
+       }
 
        return 0;
 }

Thanks a lot!
Miquèl

mans@mansr.com wrote on Thu, 22 Jun 2023 15:59:25+0100:

> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> 
> > From: JaimeLiao <jaimeliao.tw@gmail.com>
> >
> > Add support for sequential cache reads for controllers using the generic
> > core helpers for their fast read/write helpers.
> >
> > Sequential reads may reduce the overhead when accessing physically
> > continuous data by loading in cache the next page while the previous
> > page gets sent out on the NAND bus.
> >
> > The ONFI specification provides the following additional commands to
> > handle sequential cached reads:
> >
> > * 0x31 - READ CACHE SEQUENTIAL:
> >   Requires the NAND chip to load the next page into cache while keeping
> >   the current cache available for host reads.
> > * 0x3F - READ CACHE END:
> >   Tells the NAND chip this is the end of the sequential cache read, the
> >   current cache shall remain accessible for the host but no more
> >   internal cache loading operation is required.
> >
> > On the bus, a multi page read operation is currently handled like this:
> >
> > 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> > 	00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> > 	00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> >
> > Sequential cached reads may instead be achieved with:
> >
> > 	00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> > 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> > 		       31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> > 		       3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> >
> > Below are the read speed test results with regular reads and
> > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
> > a NAND chip characterized with the following timings:
> > * tR: 20 µs
> > * tRCBSY: 5 µs
> > * tRR: 20 ns
> > and the following geometry:
> > * device size: 2 MiB
> > * eraseblock size: 128 kiB
> > * page size: 2 kiB
> >
> > ============= Normal read @ 33MHz =================
> > mtd_speedtest: eraseblock read speed is 15633 KiB/s
> > mtd_speedtest: page read speed is 15515 KiB/s
> > mtd_speedtest: 2 page read speed is 15398 KiB/s
> > ===================================================
> >
> > ========= Sequential cache read @ 33MHz ===========
> > mtd_speedtest: eraseblock read speed is 18285 KiB/s
> > mtd_speedtest: page read speed is 15875 KiB/s
> > mtd_speedtest: 2 page read speed is 16253 KiB/s
> > ===================================================
> >
> > We observe an overall speed improvement of about 5% when reading
> > 2 pages, up to 15% when reading an entire block. This is due to the
> > ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> >
> > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
> > ---
> >  drivers/mtd/nand/raw/nand_base.c | 119 +++++++++++++++++++++++++++++--
> >  include/linux/mtd/rawnand.h      |   9 +++
> >  2 files changed, 124 insertions(+), 4 deletions(-)  
> 
> This change broke something on a TI AM3517 based system.  What I'm
> noticing is that the u-boot fw_setenv tool is failing due to the
> MEMGETBADBLOCK ioctl reporting some blocks as bad when they are not.
> Everything else is, somehow, working fine.  Reverting this commit fixes
> it, though I don't know why.  I'm seeing the same behaviour on multiple
> devices, so I doubt there is a problem with the flash memory.
> 
> Is there anything I can test to get more information?
>
Måns Rullgård July 16, 2023, 5:46 p.m. UTC | #10
Miquel Raynal <miquel.raynal@bootlin.com> writes:

> Hello all,
>
> So here is a summary of the situation:
> - we have two bug reports regarding the use of sequential page reads
> - both are on TI OMAP platforms: AM33XX and AM3517. I believe both are
>   using the same omap2.c driver
> - they use a Micron and a Samsung NAND chip
>
> All these information gives me the hint that it is related to the
> controller driver which does something silly during the exec_op phase.
>
> Alexander and Måns, can you please tell me:
> - Are you using a gpio for the waitrdy thing or do you leverage
>   nand_soft_waitrdy()? If you are using the gpio, can you both try with
>   the soft implementation and see if it changes something?

There's no gpio specified in the devicetree, so I guess it must be using
nand_soft_waitrdy().

> - Are you using any POLL or DMA prefetch mode? Can you please force the
>   default in and out helpers by using only omap_nand_data_in() and
>   omap_nand_data_out() to see if it changes something?

It was using the default PREFETCH_POLLED mode.  Switching it to POLLED
(and thus omap_nand_data_in/out()) doesn't change anything.

> I believe there is something wrong in the timings, while properly
> implemented in theory there might be some cases where we miss a barrier
> or something like that. I would like to try the following two hacks,
> and see if we can find what is the timing that is not observed, despite
> the lack of probing. The first one is a real hack, the second one might
> actually look like a real fix. Please let me know, both of you, if you
> see different behaviors.
>
> *** HACK #1 ***
>
> --- a/drivers/mtd/nand/raw/omap2.c
> +++ b/drivers/mtd/nand/raw/omap2.c
> @@ -2113,6 +2113,9 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
>         case NAND_OP_CMD_INSTR:
>                 iowrite8(instr->ctx.cmd.opcode,
>                          info->reg.gpmc_nand_command);
> +               if (instr->ctx.cmd.opcode == NAND_CMD_READCACHESEQ ||
> +                   instr->ctx.cmd.opcode == NAND_CMD_READCACHEEND)
> +                       udelay(50);
>                 break;
>
>         case NAND_OP_ADDR_INSTR:
>
> *** HACK #2 ***
>
> --- a/drivers/mtd/nand/raw/omap2.c
> +++ b/drivers/mtd/nand/raw/omap2.c
> @@ -2143,8 +2146,10 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
>                 break;
>         }
>
> -       if (instr->delay_ns)
> +       if (instr->delay_ns) {
> +               mb();
>                 ndelay(instr->delay_ns);
> +       }
>
>         return 0;
>  }

Neither of these help.
Alexander Shiyan July 17, 2023, 5:33 a.m. UTC | #11
Hello.

We are using GPIO for waitrdy. However, even when switching to soft
implementation,
the system still behaves incorrectly.
We are using prefetch-dma mode for xfer. Changing to the default implementation
does not result in any improvements.

Both patches don't help :(

вс, 16 июл. 2023 г. в 18:49, Miquel Raynal <miquel.raynal@bootlin.com>:
>
> Hello all,
>
> So here is a summary of the situation:
> - we have two bug reports regarding the use of sequential page reads
> - both are on TI OMAP platforms: AM33XX and AM3517. I believe both are
>   using the same omap2.c driver
> - they use a Micron and a Samsung NAND chip
>
> All these information gives me the hint that it is related to the
> controller driver which does something silly during the exec_op phase.
>
> Alexander and Måns, can you please tell me:
> - Are you using a gpio for the waitrdy thing or do you leverage
>   nand_soft_waitrdy()? If you are using the gpio, can you both try with
>   the soft implementation and see if it changes something?
> - Are you using any POLL or DMA prefetch mode? Can you please force the
>   default in and out helpers by using only omap_nand_data_in() and
>   omap_nand_data_out() to see if it changes something?
>
> I believe there is something wrong in the timings, while properly
> implemented in theory there might be some cases where we miss a barrier
> or something like that. I would like to try the following two hacks,
> and see if we can find what is the timing that is not observed, despite
> the lack of probing. The first one is a real hack, the second one might
> actually look like a real fix. Please let me know, both of you, if you
> see different behaviors.
>
> *** HACK #1 ***
>
> --- a/drivers/mtd/nand/raw/omap2.c
> +++ b/drivers/mtd/nand/raw/omap2.c
> @@ -2113,6 +2113,9 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
>         case NAND_OP_CMD_INSTR:
>                 iowrite8(instr->ctx.cmd.opcode,
>                          info->reg.gpmc_nand_command);
> +               if (instr->ctx.cmd.opcode == NAND_CMD_READCACHESEQ ||
> +                   instr->ctx.cmd.opcode == NAND_CMD_READCACHEEND)
> +                       udelay(50);
>                 break;
>
>         case NAND_OP_ADDR_INSTR:
>
> *** HACK #2 ***
>
> --- a/drivers/mtd/nand/raw/omap2.c
> +++ b/drivers/mtd/nand/raw/omap2.c
> @@ -2143,8 +2146,10 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
>                 break;
>         }
>
> -       if (instr->delay_ns)
> +       if (instr->delay_ns) {
> +               mb();
>                 ndelay(instr->delay_ns);
> +       }
>
>         return 0;
>  }
>
> Thanks a lot!
> Miquèl
>
> mans@mansr.com wrote on Thu, 22 Jun 2023 15:59:25+0100:
>
> > Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >
> > > From: JaimeLiao <jaimeliao.tw@gmail.com>
> > >
> > > Add support for sequential cache reads for controllers using the generic
> > > core helpers for their fast read/write helpers.
> > >
> > > Sequential reads may reduce the overhead when accessing physically
> > > continuous data by loading in cache the next page while the previous
> > > page gets sent out on the NAND bus.
> > >
> > > The ONFI specification provides the following additional commands to
> > > handle sequential cached reads:
> > >
> > > * 0x31 - READ CACHE SEQUENTIAL:
> > >   Requires the NAND chip to load the next page into cache while keeping
> > >   the current cache available for host reads.
> > > * 0x3F - READ CACHE END:
> > >   Tells the NAND chip this is the end of the sequential cache read, the
> > >   current cache shall remain accessible for the host but no more
> > >   internal cache loading operation is required.
> > >
> > > On the bus, a multi page read operation is currently handled like this:
> > >
> > >     00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> > >     00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> > >     00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> > >
> > > Sequential cached reads may instead be achieved with:
> > >
> > >     00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> > >                    31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> > >                    31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> > >                    3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> > >
> > > Below are the read speed test results with regular reads and
> > > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with
> > > a NAND chip characterized with the following timings:
> > > * tR: 20 µs
> > > * tRCBSY: 5 µs
> > > * tRR: 20 ns
> > > and the following geometry:
> > > * device size: 2 MiB
> > > * eraseblock size: 128 kiB
> > > * page size: 2 kiB
> > >
> > > ============= Normal read @ 33MHz =================
> > > mtd_speedtest: eraseblock read speed is 15633 KiB/s
> > > mtd_speedtest: page read speed is 15515 KiB/s
> > > mtd_speedtest: 2 page read speed is 15398 KiB/s
> > > ===================================================
> > >
> > > ========= Sequential cache read @ 33MHz ===========
> > > mtd_speedtest: eraseblock read speed is 18285 KiB/s
> > > mtd_speedtest: page read speed is 15875 KiB/s
> > > mtd_speedtest: 2 page read speed is 16253 KiB/s
> > > ===================================================
> > >
> > > We observe an overall speed improvement of about 5% when reading
> > > 2 pages, up to 15% when reading an entire block. This is due to the
> > > ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> > >
> > > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>
> > > ---
> > >  drivers/mtd/nand/raw/nand_base.c | 119 +++++++++++++++++++++++++++++--
> > >  include/linux/mtd/rawnand.h      |   9 +++
> > >  2 files changed, 124 insertions(+), 4 deletions(-)
> >
> > This change broke something on a TI AM3517 based system.  What I'm
> > noticing is that the u-boot fw_setenv tool is failing due to the
> > MEMGETBADBLOCK ioctl reporting some blocks as bad when they are not.
> > Everything else is, somehow, working fine.  Reverting this commit fixes
> > it, though I don't know why.  I'm seeing the same behaviour on multiple
> > devices, so I doubt there is a problem with the flash memory.
> >
> > Is there anything I can test to get more information?
> >
Miquel Raynal July 17, 2023, 7:19 a.m. UTC | #12
Hi Måns & Alexander,

mans@mansr.com wrote on Sun, 16 Jul 2023 18:46:26 +0100:

> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> 
> > Hello all,
> >
> > So here is a summary of the situation:
> > - we have two bug reports regarding the use of sequential page reads
> > - both are on TI OMAP platforms: AM33XX and AM3517. I believe both are
> >   using the same omap2.c driver
> > - they use a Micron and a Samsung NAND chip
> >
> > All these information gives me the hint that it is related to the
> > controller driver which does something silly during the exec_op phase.
> >
> > Alexander and Måns, can you please tell me:
> > - Are you using a gpio for the waitrdy thing or do you leverage
> >   nand_soft_waitrdy()? If you are using the gpio, can you both try with
> >   the soft implementation and see if it changes something?  
> 
> There's no gpio specified in the devicetree, so I guess it must be using
> nand_soft_waitrdy().
> 
> > - Are you using any POLL or DMA prefetch mode? Can you please force the
> >   default in and out helpers by using only omap_nand_data_in() and
> >   omap_nand_data_out() to see if it changes something?  
> 
> It was using the default PREFETCH_POLLED mode.  Switching it to POLLED
> (and thus omap_nand_data_in/out()) doesn't change anything.
> 
> > I believe there is something wrong in the timings, while properly
> > implemented in theory there might be some cases where we miss a barrier
> > or something like that. I would like to try the following two hacks,
> > and see if we can find what is the timing that is not observed, despite
> > the lack of probing. The first one is a real hack, the second one might
> > actually look like a real fix. Please let me know, both of you, if you
> > see different behaviors.
> >
> > *** HACK #1 ***
> >
> > --- a/drivers/mtd/nand/raw/omap2.c
> > +++ b/drivers/mtd/nand/raw/omap2.c
> > @@ -2113,6 +2113,9 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
> >         case NAND_OP_CMD_INSTR:
> >                 iowrite8(instr->ctx.cmd.opcode,
> >                          info->reg.gpmc_nand_command);
> > +               if (instr->ctx.cmd.opcode == NAND_CMD_READCACHESEQ ||
> > +                   instr->ctx.cmd.opcode == NAND_CMD_READCACHEEND)
> > +                       udelay(50);
> >                 break;
> >
> >         case NAND_OP_ADDR_INSTR:
> >
> > *** HACK #2 ***
> >
> > --- a/drivers/mtd/nand/raw/omap2.c
> > +++ b/drivers/mtd/nand/raw/omap2.c
> > @@ -2143,8 +2146,10 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
> >                 break;
> >         }
> >
> > -       if (instr->delay_ns)
> > +       if (instr->delay_ns) {
> > +               mb();
> >                 ndelay(instr->delay_ns);
> > +       }
> >
> >         return 0;
> >  }  
> 
> Neither of these help.

I am also pasting Alexander's answer here:

> We are using GPIO for waitrdy. However, even when switching to soft
> implementation,
> the system still behaves incorrectly.
> We are using prefetch-dma mode for xfer. Changing to the default implementation
> does not result in any improvements.
> 
> Both patches don't help :(

Thanks a lot to both of you for testing.

So, I should have done that earlier but, could you please slow the
whole operation down, just to see if there is something wrong with the
timings or if we should look in another direction.

Maybe you could add a boolean to flag if the last CMD was a
READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
true, please get the jiffies before and after each waitrdy and
delay_ns. Finally, please print the expected delay and the actual one
and compare to see if something was too fast compared to what we
expected.

In a second test, you could simply add a udelay(50); at the end of
omap_nand_exec_instr().

Thanks,
Miquèl
Måns Rullgård July 17, 2023, 1:11 p.m. UTC | #13
Miquel Raynal <miquel.raynal@bootlin.com> writes:

> So, I should have done that earlier but, could you please slow the
> whole operation down, just to see if there is something wrong with the
> timings or if we should look in another direction.
>
> Maybe you could add a boolean to flag if the last CMD was a
> READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
> true, please get the jiffies before and after each waitrdy and
> delay_ns. Finally, please print the expected delay and the actual one
> and compare to see if something was too fast compared to what we
> expected.

Between which points exactly should the delay be measured?  Also, there
is no command called READCACHESTART.  Did you mean READSTART or
something else?

> In a second test, you could simply add a udelay(50); at the end of
> omap_nand_exec_instr().

That didn't help.
Miquel Raynal July 17, 2023, 4:36 p.m. UTC | #14
Hi Måns,

mans@mansr.com wrote on Mon, 17 Jul 2023 14:11:31 +0100:

> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> 
> > So, I should have done that earlier but, could you please slow the
> > whole operation down, just to see if there is something wrong with the
> > timings or if we should look in another direction.
> >
> > Maybe you could add a boolean to flag if the last CMD was a
> > READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
> > true, please get the jiffies before and after each waitrdy and
> > delay_ns. Finally, please print the expected delay and the actual one
> > and compare to see if something was too fast compared to what we
> > expected.  
> 
> Between which points exactly should the delay be measured?  Also, there
> is no command called READCACHESTART.  Did you mean READSTART or
> something else?

Yeah, whatever command is specific to sequential cache reads:
https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228

> > In a second test, you could simply add a udelay(50); at the end of
> > omap_nand_exec_instr().  
> 
> That didn't help.

Really? That's disappointing (thanks a lot for your quick feedback).

Let's see what the measurements will return.

Thanks,
Miquèl
Måns Rullgård July 18, 2023, 2:03 p.m. UTC | #15
Miquel Raynal <miquel.raynal@bootlin.com> writes:

> Hi Måns,
>
> mans@mansr.com wrote on Mon, 17 Jul 2023 14:11:31 +0100:
>
>> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> 
>> > So, I should have done that earlier but, could you please slow the
>> > whole operation down, just to see if there is something wrong with the
>> > timings or if we should look in another direction.
>> >
>> > Maybe you could add a boolean to flag if the last CMD was a
>> > READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
>> > true, please get the jiffies before and after each waitrdy and
>> > delay_ns. Finally, please print the expected delay and the actual one
>> > and compare to see if something was too fast compared to what we
>> > expected.  
>> 
>> Between which points exactly should the delay be measured?  Also, there
>> is no command called READCACHESTART.  Did you mean READSTART or
>> something else?
>
> Yeah, whatever command is specific to sequential cache reads:
> https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
> https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228

I'm still not sure what exactly you want to me measure.  The waitrdy and
ndelay combined, each separately, or something else?
Miquel Raynal July 19, 2023, 8:21 a.m. UTC | #16
Hi Måns,

mans@mansr.com wrote on Tue, 18 Jul 2023 15:03:14 +0100:

> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> 
> > Hi Måns,
> >
> > mans@mansr.com wrote on Mon, 17 Jul 2023 14:11:31 +0100:
> >  
> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >>   
> >> > So, I should have done that earlier but, could you please slow the
> >> > whole operation down, just to see if there is something wrong with the
> >> > timings or if we should look in another direction.
> >> >
> >> > Maybe you could add a boolean to flag if the last CMD was a
> >> > READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
> >> > true, please get the jiffies before and after each waitrdy and
> >> > delay_ns. Finally, please print the expected delay and the actual one
> >> > and compare to see if something was too fast compared to what we
> >> > expected.    
> >> 
> >> Between which points exactly should the delay be measured?  Also, there
> >> is no command called READCACHESTART.  Did you mean READSTART or
> >> something else?  
> >
> > Yeah, whatever command is specific to sequential cache reads:
> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228  
> 
> I'm still not sure what exactly you want to me measure.  The waitrdy and
> ndelay combined, each separately, or something else?
> 

I would like to know, how much time we spend waiting in both cases. Is
there something wrong with the "wait ready"? As we cannot observe the
timings with a scope, because we are using a "soft" controller
implementation somehow, we can easily measure how much time we
spend in each operation by measuring the time before and after.

These information are only useful when we are doing operations related
to sequential reads.

Thanks,
Miquèl
Måns Rullgård July 19, 2023, 9:26 a.m. UTC | #17
Miquel Raynal <miquel.raynal@bootlin.com> writes:

> Hi Måns,
>
> mans@mansr.com wrote on Tue, 18 Jul 2023 15:03:14 +0100:
>
>> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> 
>> > Hi Måns,
>> >
>> > mans@mansr.com wrote on Mon, 17 Jul 2023 14:11:31 +0100:
>> >  
>> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> >>   
>> >> > So, I should have done that earlier but, could you please slow the
>> >> > whole operation down, just to see if there is something wrong with the
>> >> > timings or if we should look in another direction.
>> >> >
>> >> > Maybe you could add a boolean to flag if the last CMD was a
>> >> > READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
>> >> > true, please get the jiffies before and after each waitrdy and
>> >> > delay_ns. Finally, please print the expected delay and the actual one
>> >> > and compare to see if something was too fast compared to what we
>> >> > expected.    
>> >> 
>> >> Between which points exactly should the delay be measured?  Also, there
>> >> is no command called READCACHESTART.  Did you mean READSTART or
>> >> something else?  
>> >
>> > Yeah, whatever command is specific to sequential cache reads:
>> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
>> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228  
>> 
>> I'm still not sure what exactly you want to me measure.  The waitrdy and
>> ndelay combined, each separately, or something else?
>> 
>
> I would like to know, how much time we spend waiting in both cases.

Which "both" cases?

> Is there something wrong with the "wait ready"? As we cannot observe
> the timings with a scope, because we are using a "soft" controller
> implementation somehow, we can easily measure how much time we spend
> in each operation by measuring the time before and after.
>
> These information are only useful when we are doing operations related
> to sequential reads.

I have hooked up some spare GPIOs to a scope, which should be more
accurate (nanosecond) than software timestamps.  All I need to know is
what to measure and what to look for in those measurements.
Miquel Raynal July 19, 2023, 11:44 a.m. UTC | #18
Hi Måns,

mans@mansr.com wrote on Wed, 19 Jul 2023 10:26:09 +0100:

> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> 
> > Hi Måns,
> >
> > mans@mansr.com wrote on Tue, 18 Jul 2023 15:03:14 +0100:
> >  
> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >>   
> >> > Hi Måns,
> >> >
> >> > mans@mansr.com wrote on Mon, 17 Jul 2023 14:11:31 +0100:
> >> >    
> >> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >> >>     
> >> >> > So, I should have done that earlier but, could you please slow the
> >> >> > whole operation down, just to see if there is something wrong with the
> >> >> > timings or if we should look in another direction.
> >> >> >
> >> >> > Maybe you could add a boolean to flag if the last CMD was a
> >> >> > READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
> >> >> > true, please get the jiffies before and after each waitrdy and
> >> >> > delay_ns. Finally, please print the expected delay and the actual one
> >> >> > and compare to see if something was too fast compared to what we
> >> >> > expected.      
> >> >> 
> >> >> Between which points exactly should the delay be measured?  Also, there
> >> >> is no command called READCACHESTART.  Did you mean READSTART or
> >> >> something else?    
> >> >
> >> > Yeah, whatever command is specific to sequential cache reads:
> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228    
> >> 
> >> I'm still not sure what exactly you want to me measure.  The waitrdy and
> >> ndelay combined, each separately, or something else?
> >>   
> >
> > I would like to know, how much time we spend waiting in both cases.  
> 
> Which "both" cases?

ndelay and more importantly, waitrdy:

--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -2111,6 +2111,7 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
 
        switch (instr->type) {
        case NAND_OP_CMD_INSTR:
+               // trace the opcode
                iowrite8(instr->ctx.cmd.opcode,
                         info->reg.gpmc_nand_command);
                break;
@@ -2135,16 +2136,21 @@ static int omap_nand_exec_instr(struct nand_chip *chip,
                break;
 
        case NAND_OP_WAITRDY_INSTR:
+               // start waitrdy
                ret = info->ready_gpiod ?
                        nand_gpio_waitrdy(chip, info->ready_gpiod, instr->ctx.waitrdy.timeout_ms) :
                        nand_soft_waitrdy(chip, instr->ctx.waitrdy.timeout_ms);
+               // end
                if (ret)
                        return ret;
                break;
        }
 
-       if (instr->delay_ns)
+       if (instr->delay_ns) {
+               // start delay
                ndelay(instr->delay_ns);
+               // end
+       }
 
        return 0;
 }

> > Is there something wrong with the "wait ready"? As we cannot observe
> > the timings with a scope, because we are using a "soft" controller
> > implementation somehow, we can easily measure how much time we spend
> > in each operation by measuring the time before and after.
> >
> > These information are only useful when we are doing operations related
> > to sequential reads.  
> 
> I have hooked up some spare GPIOs to a scope, which should be more
> accurate (nanosecond) than software timestamps.  All I need to know is
> what to measure and what to look for in those measurements.

Great. The only issue with the scope is the fact that we might actually
look at something that is not a faulty sequential read op. Unless you
hack into the core to perform these in a loop (with a brutal "while
(1)"). But I don't think we require big precision here, at least as a
first step, looking at software timestamps like hinted above is enough
so we can easily identify the different delays and compare them with
nand_timings.c.

Please use whatever method is easier for you.

Thanks,
Miquèl
Måns Rullgård July 19, 2023, 1:15 p.m. UTC | #19
Miquel Raynal <miquel.raynal@bootlin.com> writes:

> Hi Måns,
>
> mans@mansr.com wrote on Wed, 19 Jul 2023 10:26:09 +0100:
>
>> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> 
>> > Hi Måns,
>> >
>> > mans@mansr.com wrote on Tue, 18 Jul 2023 15:03:14 +0100:
>> >  
>> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> >>   
>> >> > Hi Måns,
>> >> >
>> >> > mans@mansr.com wrote on Mon, 17 Jul 2023 14:11:31 +0100:
>> >> >    
>> >> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> >> >>     
>> >> >> > So, I should have done that earlier but, could you please slow the
>> >> >> > whole operation down, just to see if there is something wrong with the
>> >> >> > timings or if we should look in another direction.
>> >> >> >
>> >> >> > Maybe you could add a boolean to flag if the last CMD was a
>> >> >> > READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
>> >> >> > true, please get the jiffies before and after each waitrdy and
>> >> >> > delay_ns. Finally, please print the expected delay and the actual one
>> >> >> > and compare to see if something was too fast compared to what we
>> >> >> > expected.      
>> >> >> 
>> >> >> Between which points exactly should the delay be measured?  Also, there
>> >> >> is no command called READCACHESTART.  Did you mean READSTART or
>> >> >> something else?    
>> >> >
>> >> > Yeah, whatever command is specific to sequential cache reads:
>> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
>> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228    
>> >> 
>> >> I'm still not sure what exactly you want to me measure.  The waitrdy and
>> >> ndelay combined, each separately, or something else?
>> >>   
>> >
>> > I would like to know, how much time we spend waiting in both cases.  
>> 
>> Which "both" cases?
>
> ndelay and more importantly, waitrdy:

[...]

>> > Is there something wrong with the "wait ready"? As we cannot observe
>> > the timings with a scope, because we are using a "soft" controller
>> > implementation somehow, we can easily measure how much time we spend
>> > in each operation by measuring the time before and after.
>> >
>> > These information are only useful when we are doing operations related
>> > to sequential reads.  
>> 
>> I have hooked up some spare GPIOs to a scope, which should be more
>> accurate (nanosecond) than software timestamps.  All I need to know is
>> what to measure and what to look for in those measurements.
>
> Great. The only issue with the scope is the fact that we might actually
> look at something that is not a faulty sequential read op.

How exactly do I know which ones are faulty?

> Unless you hack into the core to perform these in a loop (with a
> brutal "while (1)"). But I don't think we require big precision here,
> at least as a first step, looking at software timestamps like hinted
> above is enough so we can easily identify the different delays and
> compare them with nand_timings.c.
>
> Please use whatever method is easier for you.

Which values should be compared?

Sorry if I'm being obtuse, I don't normally have to deal with these
things.
Miquel Raynal July 20, 2023, 6:20 a.m. UTC | #20
Hi Måns,

mans@mansr.com wrote on Wed, 19 Jul 2023 14:15:48 +0100:

> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> 
> > Hi Måns,
> >
> > mans@mansr.com wrote on Wed, 19 Jul 2023 10:26:09 +0100:
> >  
> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >>   
> >> > Hi Måns,
> >> >
> >> > mans@mansr.com wrote on Tue, 18 Jul 2023 15:03:14 +0100:
> >> >    
> >> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >> >>     
> >> >> > Hi Måns,
> >> >> >
> >> >> > mans@mansr.com wrote on Mon, 17 Jul 2023 14:11:31 +0100:
> >> >> >      
> >> >> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
> >> >> >>       
> >> >> >> > So, I should have done that earlier but, could you please slow the
> >> >> >> > whole operation down, just to see if there is something wrong with the
> >> >> >> > timings or if we should look in another direction.
> >> >> >> >
> >> >> >> > Maybe you could add a boolean to flag if the last CMD was a
> >> >> >> > READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
> >> >> >> > true, please get the jiffies before and after each waitrdy and
> >> >> >> > delay_ns. Finally, please print the expected delay and the actual one
> >> >> >> > and compare to see if something was too fast compared to what we
> >> >> >> > expected.        
> >> >> >> 
> >> >> >> Between which points exactly should the delay be measured?  Also, there
> >> >> >> is no command called READCACHESTART.  Did you mean READSTART or
> >> >> >> something else?      
> >> >> >
> >> >> > Yeah, whatever command is specific to sequential cache reads:
> >> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
> >> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228      
> >> >> 
> >> >> I'm still not sure what exactly you want to me measure.  The waitrdy and
> >> >> ndelay combined, each separately, or something else?
> >> >>     
> >> >
> >> > I would like to know, how much time we spend waiting in both cases.    
> >> 
> >> Which "both" cases?  
> >
> > ndelay and more importantly, waitrdy:  
> 
> [...]
> 
> >> > Is there something wrong with the "wait ready"? As we cannot observe
> >> > the timings with a scope, because we are using a "soft" controller
> >> > implementation somehow, we can easily measure how much time we spend
> >> > in each operation by measuring the time before and after.
> >> >
> >> > These information are only useful when we are doing operations related
> >> > to sequential reads.    
> >> 
> >> I have hooked up some spare GPIOs to a scope, which should be more
> >> accurate (nanosecond) than software timestamps.  All I need to know is
> >> what to measure and what to look for in those measurements.  
> >
> > Great. The only issue with the scope is the fact that we might actually
> > look at something that is not a faulty sequential read op.  
> 
> How exactly do I know which ones are faulty?

Right now I expect all sequential ops to be faulty. As mentioned above,
I don't think we are interested in all the commands that are sent
through the NAND bus, but just the READSTART/READCACHESEQ/READCACHEEND
sequences, see these two ops there, that's what we want to capture:

> >> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
> >> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228   

That's why a regular scope is not as easy as it sounds to use to
capture these timings.

> > Unless you hack into the core to perform these in a loop (with a
> > brutal "while (1)"). But I don't think we require big precision here,
> > at least as a first step, looking at software timestamps like hinted
> > above is enough so we can easily identify the different delays and
> > compare them with nand_timings.c.
> >
> > Please use whatever method is easier for you.  
> 
> Which values should be compared?

The specification declares minimum and maximum times (see
nand_timings.c). I want to see if these timings, which are requested by
the core (links above) are correctly observed or not. The ones that are
particularly critical because they are different than what the other
ops use, are the ones around READSTART/READCACHESEQ/READCACHEEND.
Anything else, you already use them, so it's quite likely that they are
not problematic. These are new.

> Sorry if I'm being obtuse, I don't normally have to deal with these
> things.

No problem, thanks for trying to help.

Thanks,
Miquèl
Måns Rullgård July 20, 2023, 11:42 a.m. UTC | #21
Miquel Raynal <miquel.raynal@bootlin.com> writes:

> Hi Måns,
>
> mans@mansr.com wrote on Wed, 19 Jul 2023 14:15:48 +0100:
>
>> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> 
>> > Hi Måns,
>> >
>> > mans@mansr.com wrote on Wed, 19 Jul 2023 10:26:09 +0100:
>> >  
>> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> >>   
>> >> > Hi Måns,
>> >> >
>> >> > mans@mansr.com wrote on Tue, 18 Jul 2023 15:03:14 +0100:
>> >> >    
>> >> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> >> >>     
>> >> >> > Hi Måns,
>> >> >> >
>> >> >> > mans@mansr.com wrote on Mon, 17 Jul 2023 14:11:31 +0100:
>> >> >> >      
>> >> >> >> Miquel Raynal <miquel.raynal@bootlin.com> writes:
>> >> >> >>       
>> >> >> >> > So, I should have done that earlier but, could you please slow the
>> >> >> >> > whole operation down, just to see if there is something wrong with the
>> >> >> >> > timings or if we should look in another direction.
>> >> >> >> >
>> >> >> >> > Maybe you could add a boolean to flag if the last CMD was a
>> >> >> >> > READCACHESEQ, READCACHESTART or READCACHEEND, and if the flag is
>> >> >> >> > true, please get the jiffies before and after each waitrdy and
>> >> >> >> > delay_ns. Finally, please print the expected delay and the actual one
>> >> >> >> > and compare to see if something was too fast compared to what we
>> >> >> >> > expected.        
>> >> >> >> 
>> >> >> >> Between which points exactly should the delay be measured?  Also, there
>> >> >> >> is no command called READCACHESTART.  Did you mean READSTART or
>> >> >> >> something else?      
>> >> >> >
>> >> >> > Yeah, whatever command is specific to sequential cache reads:
>> >> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
>> >> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228      
>> >> >> 
>> >> >> I'm still not sure what exactly you want to me measure.  The waitrdy and
>> >> >> ndelay combined, each separately, or something else?
>> >> >>     
>> >> >
>> >> > I would like to know, how much time we spend waiting in both cases.    
>> >> 
>> >> Which "both" cases?  
>> >
>> > ndelay and more importantly, waitrdy:  
>> 
>> [...]
>> 
>> >> > Is there something wrong with the "wait ready"? As we cannot observe
>> >> > the timings with a scope, because we are using a "soft" controller
>> >> > implementation somehow, we can easily measure how much time we spend
>> >> > in each operation by measuring the time before and after.
>> >> >
>> >> > These information are only useful when we are doing operations related
>> >> > to sequential reads.    
>> >> 
>> >> I have hooked up some spare GPIOs to a scope, which should be more
>> >> accurate (nanosecond) than software timestamps.  All I need to know is
>> >> what to measure and what to look for in those measurements.  
>> >
>> > Great. The only issue with the scope is the fact that we might actually
>> > look at something that is not a faulty sequential read op.  
>> 
>> How exactly do I know which ones are faulty?
>
> Right now I expect all sequential ops to be faulty. As mentioned above,
> I don't think we are interested in all the commands that are sent
> through the NAND bus, but just the READSTART/READCACHESEQ/READCACHEEND
> sequences, see these two ops there, that's what we want to capture:
>
>> >> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1218
>> >> >> > https://elixir.bootlin.com/linux/latest/source/drivers/mtd/nand/raw/nand_base.c#L1228   
>
> That's why a regular scope is not as easy as it sounds to use to
> capture these timings.

I have it set up so it raises one of three GPIOs at the start of
omap_nand_exec_instr() when any of those commands are issued, then a
fourth during the following waitrdy.  After the ndelay(), the pin for
the command is lowered again.  This makes it easy to measure the
duration of the waitrdy as well as any additional delay associated with
each of the commands.

The actual nand chip signals are unfortunately impossible to access.

>> > Unless you hack into the core to perform these in a loop (with a
>> > brutal "while (1)"). But I don't think we require big precision here,
>> > at least as a first step, looking at software timestamps like hinted
>> > above is enough so we can easily identify the different delays and
>> > compare them with nand_timings.c.
>> >
>> > Please use whatever method is easier for you.  
>> 
>> Which values should be compared?
>
> The specification declares minimum and maximum times (see
> nand_timings.c). I want to see if these timings, which are requested by
> the core (links above) are correctly observed or not. The ones that are
> particularly critical because they are different than what the other
> ops use, are the ones around READSTART/READCACHESEQ/READCACHEEND.
> Anything else, you already use them, so it's quite likely that they are
> not problematic. These are new.

I don't think it's quite as simple as these commands being somehow
broken.  The system works for the most part, and these commands are
definitely being used.  The only breakage I notice is that the
MEMGETBADBLOCK ioctl wrongly reports blocks as being bad under some
unclear conditions.  There appears to be some weird interaction between
this ioctl and read() calls.  Whatever the pattern is, it is entirely
predictable in that issuing the same sequence of ioctl and read always
gives the same error pattern.  Using pread instead of read changes the
pattern.
Martin Hundebøll Sept. 8, 2023, 12:25 p.m. UTC | #22
Hi Miquel et al.

On Thu, 2023-01-12 at 10:36 +0100, Miquel Raynal wrote:
> From: JaimeLiao <jaimeliao.tw@gmail.com>
> 
> Add support for sequential cache reads for controllers using the
> generic
> core helpers for their fast read/write helpers.
> 
> Sequential reads may reduce the overhead when accessing physically
> continuous data by loading in cache the next page while the previous
> page gets sent out on the NAND bus.
> 
> The ONFI specification provides the following additional commands to
> handle sequential cached reads:
> 
> * 0x31 - READ CACHE SEQUENTIAL:
>   Requires the NAND chip to load the next page into cache while
> keeping
>   the current cache available for host reads.
> * 0x3F - READ CACHE END:
>   Tells the NAND chip this is the end of the sequential cache read,
> the
>   current cache shall remain accessible for the host but no more
>   internal cache loading operation is required.
> 
> On the bus, a multi page read operation is currently handled like
> this:
> 
>         00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
>         00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
>         00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> 
> Sequential cached reads may instead be achieved with:
> 
>         00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
>                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
>                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
>                        3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> 
> Below are the read speed test results with regular reads and
> sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode
> with
> a NAND chip characterized with the following timings:
> * tR: 20 µs
> * tRCBSY: 5 µs
> * tRR: 20 ns
> and the following geometry:
> * device size: 2 MiB
> * eraseblock size: 128 kiB
> * page size: 2 kiB
> 
> ============= Normal read @ 33MHz =================
> mtd_speedtest: eraseblock read speed is 15633 KiB/s
> mtd_speedtest: page read speed is 15515 KiB/s
> mtd_speedtest: 2 page read speed is 15398 KiB/s
> ===================================================
> 
> ========= Sequential cache read @ 33MHz ===========
> mtd_speedtest: eraseblock read speed is 18285 KiB/s
> mtd_speedtest: page read speed is 15875 KiB/s
> mtd_speedtest: 2 page read speed is 16253 KiB/s
> ===================================================
> 
> We observe an overall speed improvement of about 5% when reading
> 2 pages, up to 15% when reading an entire block. This is due to the
> ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> 
> Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>

This patch broke our imx6ull system after doing a kernel upgrade:
[    2.921886] ubi0: default fastmap pool size: 100
[    2.926612] ubi0: default fastmap WL pool size: 50
[    2.931421] ubi0: attaching mtd1
[    3.515799] ubi0: scanning is finished
[    3.525237] ubi0 error: vtbl_check: bad CRC at record 0: 0x88cdfb6,
not 0xffffffff
[    3.532937] Volume table record 0 dump:
[    3.536783]  reserved_pebs   -1
[    3.539932]  alignment       -1
[    3.543101]  data_pad        -1
[    3.546251]  vol_type        255
[    3.549485]  upd_marker      255
[    3.552746]  name_len        65535
[    3.556155]  1st 5 characters of name: 
[    3.560429]  crc             0xffffffff
[    3.564294] ubi0 error: vtbl_check: bad CRC at record 0: 0x88cdfb6,
not 0xffffffff
[    3.571892] Volume table record 0 dump:
[    3.575754]  reserved_pebs   -1
[    3.578906]  alignment       -1
[    3.582049]  data_pad        -1
[    3.585216]  vol_type        255
[    3.588452]  upd_marker      255
[    3.591687]  name_len        65535
[    3.595108]  1st 5 characters of name: 
[    3.599384]  crc             0xffffffff
[    3.603285] ubi0 error: ubi_read_volume_table: both volume tables
are corrupted
[    3.611460] ubi0 error: ubi_attach_mtd_dev: failed to attach mtd1,
error -22
[    3.618760] UBI error: cannot attach mtd1
[    3.622831] UBI: block: can't open volume on ubi0_4, err=-19
[    3.628505] UBI: block: can't open volume on ubi0_6, err=-19
[    3.634196] UBI: block: can't open volume on ubi0_7, err=-19

It fails consistently at every attach operation. As mentioned above,
this is on an i.MX6ULL and a Toshiba NAND chip:
[    0.530121] nand: device found, Manufacturer ID: 0x98, Chip ID: 0xdc
[    0.530173] nand: Toshiba NAND 512MiB 3,3V 8-bit
[    0.530194] nand: 512 MiB, SLC, erase size: 256 KiB, page size:
4096, OOB size: 128

I'm happy to perform experiments to fix this.

// Martin

> ---
>  drivers/mtd/nand/raw/nand_base.c | 119
> +++++++++++++++++++++++++++++--
>  include/linux/mtd/rawnand.h      |   9 +++
>  2 files changed, 124 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/mtd/nand/raw/nand_base.c
> b/drivers/mtd/nand/raw/nand_base.c
> index 34395d5d3a47..0b1fd6bbb36b 100644
> --- a/drivers/mtd/nand/raw/nand_base.c
> +++ b/drivers/mtd/nand/raw/nand_base.c
> @@ -1208,6 +1208,73 @@ static int nand_lp_exec_read_page_op(struct
> nand_chip *chip, unsigned int page,
>         return nand_exec_op(chip, &op);
>  }
>  
> +static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip,
> unsigned int page,
> +                                         unsigned int
> offset_in_page, void *buf,
> +                                         unsigned int len, bool
> check_only)
> +{
> +       const struct nand_interface_config *conf =
> +               nand_get_interface_config(chip);
> +       u8 addrs[5];
> +       struct nand_op_instr start_instrs[] = {
> +               NAND_OP_CMD(NAND_CMD_READ0, 0),
> +               NAND_OP_ADDR(4, addrs, 0),
> +               NAND_OP_CMD(NAND_CMD_READSTART,
> NAND_COMMON_TIMING_NS(conf, tWB_max)),
> +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
> 0),
> +               NAND_OP_CMD(NAND_CMD_READCACHESEQ,
> NAND_COMMON_TIMING_NS(conf, tWB_max)),
> +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
> +                                NAND_COMMON_TIMING_NS(conf,
> tRR_min)),
> +               NAND_OP_DATA_IN(len, buf, 0),
> +       };
> +       struct nand_op_instr cont_instrs[] = {
> +               NAND_OP_CMD(page == chip->cont_read.last_page ?
> +                           NAND_CMD_READCACHEEND :
> NAND_CMD_READCACHESEQ,
> +                           NAND_COMMON_TIMING_NS(conf, tWB_max)),
> +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
> +                                NAND_COMMON_TIMING_NS(conf,
> tRR_min)),
> +               NAND_OP_DATA_IN(len, buf, 0),
> +       };
> +       struct nand_operation start_op = NAND_OPERATION(chip->cur_cs,
> start_instrs);
> +       struct nand_operation cont_op = NAND_OPERATION(chip->cur_cs,
> cont_instrs);
> +       int ret;
> +
> +       if (!len) {
> +               start_op.ninstrs--;
> +               cont_op.ninstrs--;
> +       }
> +
> +       ret = nand_fill_column_cycles(chip, addrs, offset_in_page);
> +       if (ret < 0)
> +               return ret;
> +
> +       addrs[2] = page;
> +       addrs[3] = page >> 8;
> +
> +       if (chip->options & NAND_ROW_ADDR_3) {
> +               addrs[4] = page >> 16;
> +               start_instrs[1].ctx.addr.naddrs++;
> +       }
> +
> +       /* Check if cache reads are supported */
> +       if (check_only) {
> +               if (nand_check_op(chip, &start_op) ||
> nand_check_op(chip, &cont_op))
> +                       return -EOPNOTSUPP;
> +
> +               return 0;
> +       }
> +
> +       if (page == chip->cont_read.first_page)
> +               return nand_exec_op(chip, &start_op);
> +       else
> +               return nand_exec_op(chip, &cont_op);
> +}
> +
> +static bool rawnand_cont_read_ongoing(struct nand_chip *chip,
> unsigned int page)
> +{
> +       return chip->cont_read.ongoing &&
> +               page >= chip->cont_read.first_page &&
> +               page <= chip->cont_read.last_page;
> +}
> +
>  /**
>   * nand_read_page_op - Do a READ PAGE operation
>   * @chip: The NAND chip
> @@ -1233,10 +1300,16 @@ int nand_read_page_op(struct nand_chip *chip,
> unsigned int page,
>                 return -EINVAL;
>  
>         if (nand_has_exec_op(chip)) {
> -               if (mtd->writesize > 512)
> -                       return nand_lp_exec_read_page_op(chip, page,
> -                                                       
> offset_in_page, buf,
> -                                                        len);
> +               if (mtd->writesize > 512) {
> +                       if (rawnand_cont_read_ongoing(chip, page))
> +                               return
> nand_lp_exec_cont_read_page_op(chip, page,
> +                                                                    
> offset_in_page,
> +                                                                    
> buf, len, false);
> +                       else
> +                               return
> nand_lp_exec_read_page_op(chip, page,
> +                                                               
> offset_in_page, buf,
> +                                                               
> len);
> +               }
>  
>                 return nand_sp_exec_read_page_op(chip, page,
> offset_in_page,
>                                                  buf, len);
> @@ -3353,6 +3426,27 @@ static uint8_t *nand_transfer_oob(struct
> nand_chip *chip, uint8_t *oob,
>         return NULL;
>  }
>  
> +static void rawnand_enable_cont_reads(struct nand_chip *chip,
> unsigned int page,
> +                                     u32 readlen, int col)
> +{
> +       struct mtd_info *mtd = nand_to_mtd(chip);
> +
> +       if (!chip->controller->supported_op.cont_read)
> +               return;
> +
> +       if ((col && col + readlen < (3 * mtd->writesize)) ||
> +           (!col && readlen < (2 * mtd->writesize))) {
> +               chip->cont_read.ongoing = false;
> +               return;
> +       }
> +
> +       chip->cont_read.ongoing = true;
> +       chip->cont_read.first_page = page;
> +       if (col)
> +               chip->cont_read.first_page++;
> +       chip->cont_read.last_page = page + ((readlen >> chip-
> >page_shift) & chip->pagemask);
> +}
> +
>  /**
>   * nand_setup_read_retry - [INTERN] Set the READ RETRY mode
>   * @chip: NAND chip object
> @@ -3426,6 +3520,8 @@ static int nand_do_read_ops(struct nand_chip
> *chip, loff_t from,
>         oob = ops->oobbuf;
>         oob_required = oob ? 1 : 0;
>  
> +       rawnand_enable_cont_reads(chip, page, readlen, col);
> +
>         while (1) {
>                 struct mtd_ecc_stats ecc_stats = mtd->ecc_stats;
>  
> @@ -5009,12 +5105,27 @@ static void
> rawnand_early_check_supported_ops(struct nand_chip *chip)
>         rawnand_check_data_only_read_support(chip);
>  }
>  
> +static void rawnand_check_cont_read_support(struct nand_chip *chip)
> +{
> +       struct mtd_info *mtd = nand_to_mtd(chip);
> +
> +       if (chip->read_retries)
> +               return;
> +
> +       if (!nand_lp_exec_cont_read_page_op(chip, 0, 0, NULL,
> +                                           mtd->writesize, true))
> +               chip->controller->supported_op.cont_read = 1;
> +}
> +
>  static void rawnand_late_check_supported_ops(struct nand_chip *chip)
>  {
>         /* The supported_op fields should not be set by individual
> drivers */
> +       WARN_ON_ONCE(chip->controller->supported_op.cont_read);
>  
>         if (!nand_has_exec_op(chip))
>                 return;
> +
> +       rawnand_check_cont_read_support(chip);
>  }
>  
>  /*
> diff --git a/include/linux/mtd/rawnand.h
> b/include/linux/mtd/rawnand.h
> index 28c5dce782dd..1b0936fe3c6e 100644
> --- a/include/linux/mtd/rawnand.h
> +++ b/include/linux/mtd/rawnand.h
> @@ -67,6 +67,8 @@ struct gpio_desc;
>  
>  /* Extended commands for large page devices */
>  #define NAND_CMD_READSTART     0x30
> +#define NAND_CMD_READCACHESEQ  0x31
> +#define NAND_CMD_READCACHEEND  0x3f
>  #define NAND_CMD_RNDOUTSTART   0xE0
>  #define NAND_CMD_CACHEDPROG    0x15
>  
> @@ -1099,12 +1101,14 @@ struct nand_controller_ops {
>   * @supported_op.data_only_read: The controller supports reading
> more data from
>   *                     the bus without restarting an entire read
> operation nor
>   *                     changing the column.
> + * @supported_op.cont_read: The controller supports sequential cache
> reads.
>   */
>  struct nand_controller {
>         struct mutex lock;
>         const struct nand_controller_ops *ops;
>         struct {
>                 unsigned int data_only_read: 1;
> +               unsigned int cont_read: 1;
>         } supported_op;
>  };
>  
> @@ -1308,6 +1312,11 @@ struct nand_chip {
>         int read_retries;
>         struct nand_secure_region *secure_regions;
>         u8 nr_secure_regions;
> +       struct {
> +               bool ongoing;
> +               unsigned int first_page;
> +               unsigned int last_page;
> +       } cont_read;
>  
>         /* Externals */
>         struct nand_controller *controller;
Miquel Raynal Sept. 12, 2023, 3:59 p.m. UTC | #23
Hi Martin,

martin@geanix.com wrote on Fri, 08 Sep 2023 14:25:59 +0200:

> Hi Miquel et al.
> 
> On Thu, 2023-01-12 at 10:36 +0100, Miquel Raynal wrote:
> > From: JaimeLiao <jaimeliao.tw@gmail.com>
> > 
> > Add support for sequential cache reads for controllers using the
> > generic
> > core helpers for their fast read/write helpers.
> > 
> > Sequential reads may reduce the overhead when accessing physically
> > continuous data by loading in cache the next page while the previous
> > page gets sent out on the NAND bus.
> > 
> > The ONFI specification provides the following additional commands to
> > handle sequential cached reads:
> > 
> > * 0x31 - READ CACHE SEQUENTIAL:
> >   Requires the NAND chip to load the next page into cache while
> > keeping
> >   the current cache available for host reads.
> > * 0x3F - READ CACHE END:
> >   Tells the NAND chip this is the end of the sequential cache read,
> > the
> >   current cache shall remain accessible for the host but no more
> >   internal cache loading operation is required.
> > 
> > On the bus, a multi page read operation is currently handled like
> > this:
> > 
> >         00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> >         00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> >         00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> > 
> > Sequential cached reads may instead be achieved with:
> > 
> >         00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> >                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> >                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> >                        3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> > 
> > Below are the read speed test results with regular reads and
> > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode
> > with
> > a NAND chip characterized with the following timings:
> > * tR: 20 µs
> > * tRCBSY: 5 µs
> > * tRR: 20 ns
> > and the following geometry:
> > * device size: 2 MiB
> > * eraseblock size: 128 kiB
> > * page size: 2 kiB
> > 
> > ============= Normal read @ 33MHz =================
> > mtd_speedtest: eraseblock read speed is 15633 KiB/s
> > mtd_speedtest: page read speed is 15515 KiB/s
> > mtd_speedtest: 2 page read speed is 15398 KiB/s
> > ===================================================
> > 
> > ========= Sequential cache read @ 33MHz ===========
> > mtd_speedtest: eraseblock read speed is 18285 KiB/s
> > mtd_speedtest: page read speed is 15875 KiB/s
> > mtd_speedtest: 2 page read speed is 16253 KiB/s
> > ===================================================
> > 
> > We observe an overall speed improvement of about 5% when reading
> > 2 pages, up to 15% when reading an entire block. This is due to the
> > ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> > 
> > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>  
> 
> This patch broke our imx6ull system after doing a kernel upgrade:
> [    2.921886] ubi0: default fastmap pool size: 100
> [    2.926612] ubi0: default fastmap WL pool size: 50
> [    2.931421] ubi0: attaching mtd1
> [    3.515799] ubi0: scanning is finished
> [    3.525237] ubi0 error: vtbl_check: bad CRC at record 0: 0x88cdfb6,
> not 0xffffffff
> [    3.532937] Volume table record 0 dump:
> [    3.536783]  reserved_pebs   -1
> [    3.539932]  alignment       -1
> [    3.543101]  data_pad        -1
> [    3.546251]  vol_type        255
> [    3.549485]  upd_marker      255
> [    3.552746]  name_len        65535
> [    3.556155]  1st 5 characters of name: 
> [    3.560429]  crc             0xffffffff
> [    3.564294] ubi0 error: vtbl_check: bad CRC at record 0: 0x88cdfb6,
> not 0xffffffff
> [    3.571892] Volume table record 0 dump:
> [    3.575754]  reserved_pebs   -1
> [    3.578906]  alignment       -1
> [    3.582049]  data_pad        -1
> [    3.585216]  vol_type        255
> [    3.588452]  upd_marker      255
> [    3.591687]  name_len        65535
> [    3.595108]  1st 5 characters of name: 
> [    3.599384]  crc             0xffffffff
> [    3.603285] ubi0 error: ubi_read_volume_table: both volume tables
> are corrupted
> [    3.611460] ubi0 error: ubi_attach_mtd_dev: failed to attach mtd1,
> error -22
> [    3.618760] UBI error: cannot attach mtd1
> [    3.622831] UBI: block: can't open volume on ubi0_4, err=-19
> [    3.628505] UBI: block: can't open volume on ubi0_6, err=-19
> [    3.634196] UBI: block: can't open volume on ubi0_7, err=-19
> 
> It fails consistently at every attach operation. As mentioned above,
> this is on an i.MX6ULL and a Toshiba NAND chip:
> [    0.530121] nand: device found, Manufacturer ID: 0x98, Chip ID: 0xdc
> [    0.530173] nand: Toshiba NAND 512MiB 3,3V 8-bit
> [    0.530194] nand: 512 MiB, SLC, erase size: 256 KiB, page size:
> 4096, OOB size: 128
> 
> I'm happy to perform experiments to fix this.

I received two other reports using another controller and two different
chips, we investigated the timings which could be the issue but found
nothing relevant. I thought it was specific to the controller (or its
driver) but if you reproduce on imx6 it must be something else.
Especially since this series was also tested on imx6. So maybe some
devices do not really support what they advertise? Or they expect
another sequence? I need to investigate this further but I am a bit
clueless.

Still thinking...

Thanks, Miquèl

> 
> // Martin
> 
> > ---
> >  drivers/mtd/nand/raw/nand_base.c | 119
> > +++++++++++++++++++++++++++++--
> >  include/linux/mtd/rawnand.h      |   9 +++
> >  2 files changed, 124 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/mtd/nand/raw/nand_base.c
> > b/drivers/mtd/nand/raw/nand_base.c
> > index 34395d5d3a47..0b1fd6bbb36b 100644
> > --- a/drivers/mtd/nand/raw/nand_base.c
> > +++ b/drivers/mtd/nand/raw/nand_base.c
> > @@ -1208,6 +1208,73 @@ static int nand_lp_exec_read_page_op(struct
> > nand_chip *chip, unsigned int page,
> >         return nand_exec_op(chip, &op);
> >  }
> >  
> > +static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip,
> > unsigned int page,
> > +                                         unsigned int
> > offset_in_page, void *buf,
> > +                                         unsigned int len, bool
> > check_only)
> > +{
> > +       const struct nand_interface_config *conf =
> > +               nand_get_interface_config(chip);
> > +       u8 addrs[5];
> > +       struct nand_op_instr start_instrs[] = {
> > +               NAND_OP_CMD(NAND_CMD_READ0, 0),
> > +               NAND_OP_ADDR(4, addrs, 0),
> > +               NAND_OP_CMD(NAND_CMD_READSTART,
> > NAND_COMMON_TIMING_NS(conf, tWB_max)),
> > +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
> > 0),
> > +               NAND_OP_CMD(NAND_CMD_READCACHESEQ,
> > NAND_COMMON_TIMING_NS(conf, tWB_max)),
> > +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
> > +                                NAND_COMMON_TIMING_NS(conf,
> > tRR_min)),
> > +               NAND_OP_DATA_IN(len, buf, 0),
> > +       };
> > +       struct nand_op_instr cont_instrs[] = {
> > +               NAND_OP_CMD(page == chip->cont_read.last_page ?
> > +                           NAND_CMD_READCACHEEND :
> > NAND_CMD_READCACHESEQ,
> > +                           NAND_COMMON_TIMING_NS(conf, tWB_max)),
> > +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
> > +                                NAND_COMMON_TIMING_NS(conf,
> > tRR_min)),
> > +               NAND_OP_DATA_IN(len, buf, 0),
> > +       };
> > +       struct nand_operation start_op = NAND_OPERATION(chip->cur_cs,
> > start_instrs);
> > +       struct nand_operation cont_op = NAND_OPERATION(chip->cur_cs,
> > cont_instrs);
> > +       int ret;
> > +
> > +       if (!len) {
> > +               start_op.ninstrs--;
> > +               cont_op.ninstrs--;
> > +       }
> > +
> > +       ret = nand_fill_column_cycles(chip, addrs, offset_in_page);
> > +       if (ret < 0)
> > +               return ret;
> > +
> > +       addrs[2] = page;
> > +       addrs[3] = page >> 8;
> > +
> > +       if (chip->options & NAND_ROW_ADDR_3) {
> > +               addrs[4] = page >> 16;
> > +               start_instrs[1].ctx.addr.naddrs++;
> > +       }
> > +
> > +       /* Check if cache reads are supported */
> > +       if (check_only) {
> > +               if (nand_check_op(chip, &start_op) ||
> > nand_check_op(chip, &cont_op))
> > +                       return -EOPNOTSUPP;
> > +
> > +               return 0;
> > +       }
> > +
> > +       if (page == chip->cont_read.first_page)
> > +               return nand_exec_op(chip, &start_op);
> > +       else
> > +               return nand_exec_op(chip, &cont_op);
> > +}
> > +
> > +static bool rawnand_cont_read_ongoing(struct nand_chip *chip,
> > unsigned int page)
> > +{
> > +       return chip->cont_read.ongoing &&
> > +               page >= chip->cont_read.first_page &&
> > +               page <= chip->cont_read.last_page;
> > +}
> > +
> >  /**
> >   * nand_read_page_op - Do a READ PAGE operation
> >   * @chip: The NAND chip
> > @@ -1233,10 +1300,16 @@ int nand_read_page_op(struct nand_chip *chip,
> > unsigned int page,
> >                 return -EINVAL;
> >  
> >         if (nand_has_exec_op(chip)) {
> > -               if (mtd->writesize > 512)
> > -                       return nand_lp_exec_read_page_op(chip, page,
> > -                                                       
> > offset_in_page, buf,
> > -                                                        len);
> > +               if (mtd->writesize > 512) {
> > +                       if (rawnand_cont_read_ongoing(chip, page))
> > +                               return
> > nand_lp_exec_cont_read_page_op(chip, page,
> > +                                                                    
> > offset_in_page,
> > +                                                                    
> > buf, len, false);
> > +                       else
> > +                               return
> > nand_lp_exec_read_page_op(chip, page,
> > +                                                               
> > offset_in_page, buf,
> > +                                                               
> > len);
> > +               }
> >  
> >                 return nand_sp_exec_read_page_op(chip, page,
> > offset_in_page,
> >                                                  buf, len);
> > @@ -3353,6 +3426,27 @@ static uint8_t *nand_transfer_oob(struct
> > nand_chip *chip, uint8_t *oob,
> >         return NULL;
> >  }
> >  
> > +static void rawnand_enable_cont_reads(struct nand_chip *chip,
> > unsigned int page,
> > +                                     u32 readlen, int col)
> > +{
> > +       struct mtd_info *mtd = nand_to_mtd(chip);
> > +
> > +       if (!chip->controller->supported_op.cont_read)
> > +               return;
> > +
> > +       if ((col && col + readlen < (3 * mtd->writesize)) ||
> > +           (!col && readlen < (2 * mtd->writesize))) {
> > +               chip->cont_read.ongoing = false;
> > +               return;
> > +       }
> > +
> > +       chip->cont_read.ongoing = true;
> > +       chip->cont_read.first_page = page;
> > +       if (col)
> > +               chip->cont_read.first_page++;
> > +       chip->cont_read.last_page = page + ((readlen >> chip-  
> > >page_shift) & chip->pagemask);  
> > +}
> > +
> >  /**
> >   * nand_setup_read_retry - [INTERN] Set the READ RETRY mode
> >   * @chip: NAND chip object
> > @@ -3426,6 +3520,8 @@ static int nand_do_read_ops(struct nand_chip
> > *chip, loff_t from,
> >         oob = ops->oobbuf;
> >         oob_required = oob ? 1 : 0;
> >  
> > +       rawnand_enable_cont_reads(chip, page, readlen, col);
> > +
> >         while (1) {
> >                 struct mtd_ecc_stats ecc_stats = mtd->ecc_stats;
> >  
> > @@ -5009,12 +5105,27 @@ static void
> > rawnand_early_check_supported_ops(struct nand_chip *chip)
> >         rawnand_check_data_only_read_support(chip);
> >  }
> >  
> > +static void rawnand_check_cont_read_support(struct nand_chip *chip)
> > +{
> > +       struct mtd_info *mtd = nand_to_mtd(chip);
> > +
> > +       if (chip->read_retries)
> > +               return;
> > +
> > +       if (!nand_lp_exec_cont_read_page_op(chip, 0, 0, NULL,
> > +                                           mtd->writesize, true))
> > +               chip->controller->supported_op.cont_read = 1;
> > +}
> > +
> >  static void rawnand_late_check_supported_ops(struct nand_chip *chip)
> >  {
> >         /* The supported_op fields should not be set by individual
> > drivers */
> > +       WARN_ON_ONCE(chip->controller->supported_op.cont_read);
> >  
> >         if (!nand_has_exec_op(chip))
> >                 return;
> > +
> > +       rawnand_check_cont_read_support(chip);
> >  }
> >  
> >  /*
> > diff --git a/include/linux/mtd/rawnand.h
> > b/include/linux/mtd/rawnand.h
> > index 28c5dce782dd..1b0936fe3c6e 100644
> > --- a/include/linux/mtd/rawnand.h
> > +++ b/include/linux/mtd/rawnand.h
> > @@ -67,6 +67,8 @@ struct gpio_desc;
> >  
> >  /* Extended commands for large page devices */
> >  #define NAND_CMD_READSTART     0x30
> > +#define NAND_CMD_READCACHESEQ  0x31
> > +#define NAND_CMD_READCACHEEND  0x3f
> >  #define NAND_CMD_RNDOUTSTART   0xE0
> >  #define NAND_CMD_CACHEDPROG    0x15
> >  
> > @@ -1099,12 +1101,14 @@ struct nand_controller_ops {
> >   * @supported_op.data_only_read: The controller supports reading
> > more data from
> >   *                     the bus without restarting an entire read
> > operation nor
> >   *                     changing the column.
> > + * @supported_op.cont_read: The controller supports sequential cache
> > reads.
> >   */
> >  struct nand_controller {
> >         struct mutex lock;
> >         const struct nand_controller_ops *ops;
> >         struct {
> >                 unsigned int data_only_read: 1;
> > +               unsigned int cont_read: 1;
> >         } supported_op;
> >  };
> >  
> > @@ -1308,6 +1312,11 @@ struct nand_chip {
> >         int read_retries;
> >         struct nand_secure_region *secure_regions;
> >         u8 nr_secure_regions;
> > +       struct {
> > +               bool ongoing;
> > +               unsigned int first_page;
> > +               unsigned int last_page;
> > +       } cont_read;
> >  
> >         /* Externals */
> >         struct nand_controller *controller;  
>
Martin Hundebøll Sept. 15, 2023, 11:20 a.m. UTC | #24
Hi Miquel,

On Tue, 2023-09-12 at 17:59 +0200, Miquel Raynal wrote:
> Hi Martin,
> 
> martin@geanix.com wrote on Fri, 08 Sep 2023 14:25:59 +0200:
> 
> > Hi Miquel et al.
> > 
> > On Thu, 2023-01-12 at 10:36 +0100, Miquel Raynal wrote:
> > > From: JaimeLiao <jaimeliao.tw@gmail.com>
> > > 
> > > Add support for sequential cache reads for controllers using the
> > > generic
> > > core helpers for their fast read/write helpers.
> > > 
> > > Sequential reads may reduce the overhead when accessing
> > > physically
> > > continuous data by loading in cache the next page while the
> > > previous
> > > page gets sent out on the NAND bus.
> > > 
> > > The ONFI specification provides the following additional commands
> > > to
> > > handle sequential cached reads:
> > > 
> > > * 0x31 - READ CACHE SEQUENTIAL:
> > >   Requires the NAND chip to load the next page into cache while
> > > keeping
> > >   the current cache available for host reads.
> > > * 0x3F - READ CACHE END:
> > >   Tells the NAND chip this is the end of the sequential cache
> > > read,
> > > the
> > >   current cache shall remain accessible for the host but no more
> > >   internal cache loading operation is required.
> > > 
> > > On the bus, a multi page read operation is currently handled like
> > > this:
> > > 
> > >         00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> > >         00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> > >         00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> > > 
> > > Sequential cached reads may instead be achieved with:
> > > 
> > >         00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> > >                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> > >                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> > >                        3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> > > 
> > > Below are the read speed test results with regular reads and
> > > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping
> > > mode
> > > with
> > > a NAND chip characterized with the following timings:
> > > * tR: 20 µs
> > > * tRCBSY: 5 µs
> > > * tRR: 20 ns
> > > and the following geometry:
> > > * device size: 2 MiB
> > > * eraseblock size: 128 kiB
> > > * page size: 2 kiB
> > > 
> > > ============= Normal read @ 33MHz =================
> > > mtd_speedtest: eraseblock read speed is 15633 KiB/s
> > > mtd_speedtest: page read speed is 15515 KiB/s
> > > mtd_speedtest: 2 page read speed is 15398 KiB/s
> > > ===================================================
> > > 
> > > ========= Sequential cache read @ 33MHz ===========
> > > mtd_speedtest: eraseblock read speed is 18285 KiB/s
> > > mtd_speedtest: page read speed is 15875 KiB/s
> > > mtd_speedtest: 2 page read speed is 16253 KiB/s
> > > ===================================================
> > > 
> > > We observe an overall speed improvement of about 5% when reading
> > > 2 pages, up to 15% when reading an entire block. This is due to
> > > the
> > > ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> > > 
> > > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>  
> > 
> > This patch broke our imx6ull system after doing a kernel upgrade:
> > [    2.921886] ubi0: default fastmap pool size: 100
> > [    2.926612] ubi0: default fastmap WL pool size: 50
> > [    2.931421] ubi0: attaching mtd1
> > [    3.515799] ubi0: scanning is finished
> > [    3.525237] ubi0 error: vtbl_check: bad CRC at record 0:
> > 0x88cdfb6,
> > not 0xffffffff
> > [    3.532937] Volume table record 0 dump:
> > [    3.536783]  reserved_pebs   -1
> > [    3.539932]  alignment       -1
> > [    3.543101]  data_pad        -1
> > [    3.546251]  vol_type        255
> > [    3.549485]  upd_marker      255
> > [    3.552746]  name_len        65535
> > [    3.556155]  1st 5 characters of name: 
> > [    3.560429]  crc             0xffffffff
> > [    3.564294] ubi0 error: vtbl_check: bad CRC at record 0:
> > 0x88cdfb6,
> > not 0xffffffff
> > [    3.571892] Volume table record 0 dump:
> > [    3.575754]  reserved_pebs   -1
> > [    3.578906]  alignment       -1
> > [    3.582049]  data_pad        -1
> > [    3.585216]  vol_type        255
> > [    3.588452]  upd_marker      255
> > [    3.591687]  name_len        65535
> > [    3.595108]  1st 5 characters of name: 
> > [    3.599384]  crc             0xffffffff
> > [    3.603285] ubi0 error: ubi_read_volume_table: both volume
> > tables
> > are corrupted
> > [    3.611460] ubi0 error: ubi_attach_mtd_dev: failed to attach
> > mtd1,
> > error -22
> > [    3.618760] UBI error: cannot attach mtd1
> > [    3.622831] UBI: block: can't open volume on ubi0_4, err=-19
> > [    3.628505] UBI: block: can't open volume on ubi0_6, err=-19
> > [    3.634196] UBI: block: can't open volume on ubi0_7, err=-19
> > 
> > It fails consistently at every attach operation. As mentioned
> > above,
> > this is on an i.MX6ULL and a Toshiba NAND chip:
> > [    0.530121] nand: device found, Manufacturer ID: 0x98, Chip ID:
> > 0xdc
> > [    0.530173] nand: Toshiba NAND 512MiB 3,3V 8-bit
> > [    0.530194] nand: 512 MiB, SLC, erase size: 256 KiB, page size:
> > 4096, OOB size: 128
> > 
> > I'm happy to perform experiments to fix this.
> 
> I received two other reports using another controller and two
> different
> chips, we investigated the timings which could be the issue but found
> nothing relevant. I thought it was specific to the controller (or its
> driver) but if you reproduce on imx6 it must be something else.
> Especially since this series was also tested on imx6. So maybe some
> devices do not really support what they advertise? Or they expect
> another sequence? I need to investigate this further but I am a bit
> clueless.
> > 

For what it's worth: I've tested sequential read on an almost identical
board that has a Micron flash instead of the Toshiba one:

[    1.370336] nand: device found, Manufacturer ID: 0x2c, Chip ID: 0xdc
[    1.376830] nand: Micron MT29F4G08ABAFAWP
[    1.380870] nand: 512 MiB, SLC, erase size: 256 KiB, page size:
4096, OOB size: 256


Fist, ubi seems to be happy:

[    2.702415] ubi0: default fastmap pool size: 100                   
[    2.707138] ubi0: default fastmap WL pool size: 50                 
[    2.711946] ubi0: attaching mtd1                                   
[    3.528830] ubi0: scanning is finished                             
[    3.540626] ubi0: attached mtd1 (name "ubi", size 504 MiB)         
[    3.546235] ubi0: PEB size: 262144 bytes (256 KiB), LEB size: 253952
bytes                                                                 
[    3.553169] ubi0: min./max. I/O unit sizes: 4096/4096, sub-page size
4096                                                                  
[    3.559972] ubi0: VID header offset: 4096 (aligned 4096), data
offset: 8192                                                          
[    3.566968] ubi0: good PEBs: 2012, bad PEBs: 4, corrupted PEBs: 0  
[    3.573096] ubi0: user volume: 9, internal volumes: 1, max. volumes
count: 128                                                            
[    3.580330] ubi0: max/mean erase counter: 5/2, WL threshold: 4096,
image sequence number: 1298270752                                     
[    3.589496] ubi0: available PEBs: 12, total reserved PEBs: 2000,
PEBs reserved for bad PEB handling: 36                                
[    3.598945] ubi0: background thread "ubi_bgt0d" started, PID 35    
[    3.607409] block ubiblock0_4: created from ubi0:4(rootfs.a)       
[    3.615190] block ubiblock0_6: created from ubi0:6(appfs.a)        
[    3.622888] block ubiblock0_7: created from ubi0:7(appfs.b)


But then squashfs fails (we're using ubiblock devices):

[    3.651818] VFS: Mounted root (squashfs filesystem) readonly on
device 254:0.                                                         
[    3.660646] devtmpfs: mounted                                      
[    3.665515] Freeing unused kernel image (initmem) memory: 1024K    
[    3.672816] Run /sbin/init as init process                         
[    3.678097] SQUASHFS error: Unable to read inode 0x8008115d        
[    3.683917] Starting init: /sbin/init exists but couldn't execute it
(error -22)                                                           
[    3.691340] Run /etc/init as init process                          
[    3.697553] Run /bin/init as init process                          
[    3.702982] Run /bin/sh as init process

[    2.702415] ubi0: default fastmap pool size: 100
[    2.707138] ubi0: default fastmap WL pool size: 50
[    2.711946] ubi0: attaching mtd1                                   
[    3.528830] ubi0: scanning is finished                             
[    3.540626] ubi0: attached mtd1 (name "ubi", size 504 MiB)
[    3.546235] ubi0: PEB size: 262144 bytes (256 KiB), LEB size: 253952
bytes
[    3.553169] ubi0: min./max. I/O unit sizes: 4096/4096, sub-page size
4096
[    3.559972] ubi0: VID header offset: 4096 (aligned 4096), data
offset: 8192
[    3.566968] ubi0: good PEBs: 2012, bad PEBs: 4, corrupted PEBs: 0
[    3.573096] ubi0: user volume: 9, internal volumes: 1, max. volumes
count: 128
[    3.580330] ubi0: max/mean erase counter: 5/2, WL threshold: 4096,
image sequence number: 1298270752
[    3.589496] ubi0: available PEBs: 12, total reserved PEBs: 2000,
PEBs reserved for bad PEB handling: 36
[    3.598945] ubi0: background thread "ubi_bgt0d" started, PID 35


I think the patch should be reverted, until we get some more insight.

// Martin

> > // Martin
> > 
> > > ---
> > >  drivers/mtd/nand/raw/nand_base.c | 119
> > > +++++++++++++++++++++++++++++--
> > >  include/linux/mtd/rawnand.h      |   9 +++
> > >  2 files changed, 124 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/mtd/nand/raw/nand_base.c
> > > b/drivers/mtd/nand/raw/nand_base.c
> > > index 34395d5d3a47..0b1fd6bbb36b 100644
> > > --- a/drivers/mtd/nand/raw/nand_base.c
> > > +++ b/drivers/mtd/nand/raw/nand_base.c
> > > @@ -1208,6 +1208,73 @@ static int
> > > nand_lp_exec_read_page_op(struct
> > > nand_chip *chip, unsigned int page,
> > >         return nand_exec_op(chip, &op);
> > >  }
> > >  
> > > +static int nand_lp_exec_cont_read_page_op(struct nand_chip
> > > *chip,
> > > unsigned int page,
> > > +                                         unsigned int
> > > offset_in_page, void *buf,
> > > +                                         unsigned int len, bool
> > > check_only)
> > > +{
> > > +       const struct nand_interface_config *conf =
> > > +               nand_get_interface_config(chip);
> > > +       u8 addrs[5];
> > > +       struct nand_op_instr start_instrs[] = {
> > > +               NAND_OP_CMD(NAND_CMD_READ0, 0),
> > > +               NAND_OP_ADDR(4, addrs, 0),
> > > +               NAND_OP_CMD(NAND_CMD_READSTART,
> > > NAND_COMMON_TIMING_NS(conf, tWB_max)),
> > > +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf,
> > > tR_max),
> > > 0),
> > > +               NAND_OP_CMD(NAND_CMD_READCACHESEQ,
> > > NAND_COMMON_TIMING_NS(conf, tWB_max)),
> > > +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf,
> > > tR_max),
> > > +                                NAND_COMMON_TIMING_NS(conf,
> > > tRR_min)),
> > > +               NAND_OP_DATA_IN(len, buf, 0),
> > > +       };
> > > +       struct nand_op_instr cont_instrs[] = {
> > > +               NAND_OP_CMD(page == chip->cont_read.last_page ?
> > > +                           NAND_CMD_READCACHEEND :
> > > NAND_CMD_READCACHESEQ,
> > > +                           NAND_COMMON_TIMING_NS(conf,
> > > tWB_max)),
> > > +               NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf,
> > > tR_max),
> > > +                                NAND_COMMON_TIMING_NS(conf,
> > > tRR_min)),
> > > +               NAND_OP_DATA_IN(len, buf, 0),
> > > +       };
> > > +       struct nand_operation start_op = NAND_OPERATION(chip-
> > > >cur_cs,
> > > start_instrs);
> > > +       struct nand_operation cont_op = NAND_OPERATION(chip-
> > > >cur_cs,
> > > cont_instrs);
> > > +       int ret;
> > > +
> > > +       if (!len) {
> > > +               start_op.ninstrs--;
> > > +               cont_op.ninstrs--;
> > > +       }
> > > +
> > > +       ret = nand_fill_column_cycles(chip, addrs,
> > > offset_in_page);
> > > +       if (ret < 0)
> > > +               return ret;
> > > +
> > > +       addrs[2] = page;
> > > +       addrs[3] = page >> 8;
> > > +
> > > +       if (chip->options & NAND_ROW_ADDR_3) {
> > > +               addrs[4] = page >> 16;
> > > +               start_instrs[1].ctx.addr.naddrs++;
> > > +       }
> > > +
> > > +       /* Check if cache reads are supported */
> > > +       if (check_only) {
> > > +               if (nand_check_op(chip, &start_op) ||
> > > nand_check_op(chip, &cont_op))
> > > +                       return -EOPNOTSUPP;
> > > +
> > > +               return 0;
> > > +       }
> > > +
> > > +       if (page == chip->cont_read.first_page)
> > > +               return nand_exec_op(chip, &start_op);
> > > +       else
> > > +               return nand_exec_op(chip, &cont_op);
> > > +}
> > > +
> > > +static bool rawnand_cont_read_ongoing(struct nand_chip *chip,
> > > unsigned int page)
> > > +{
> > > +       return chip->cont_read.ongoing &&
> > > +               page >= chip->cont_read.first_page &&
> > > +               page <= chip->cont_read.last_page;
> > > +}
> > > +
> > >  /**
> > >   * nand_read_page_op - Do a READ PAGE operation
> > >   * @chip: The NAND chip
> > > @@ -1233,10 +1300,16 @@ int nand_read_page_op(struct nand_chip
> > > *chip,
> > > unsigned int page,
> > >                 return -EINVAL;
> > >  
> > >         if (nand_has_exec_op(chip)) {
> > > -               if (mtd->writesize > 512)
> > > -                       return nand_lp_exec_read_page_op(chip,
> > > page,
> > > -                                                       
> > > offset_in_page, buf,
> > > -                                                        len);
> > > +               if (mtd->writesize > 512) {
> > > +                       if (rawnand_cont_read_ongoing(chip,
> > > page))
> > > +                               return
> > > nand_lp_exec_cont_read_page_op(chip, page,
> > > +                                                                
> > >     
> > > offset_in_page,
> > > +                                                                
> > >     
> > > buf, len, false);
> > > +                       else
> > > +                               return
> > > nand_lp_exec_read_page_op(chip, page,
> > > +                                                               
> > > offset_in_page, buf,
> > > +                                                               
> > > len);
> > > +               }
> > >  
> > >                 return nand_sp_exec_read_page_op(chip, page,
> > > offset_in_page,
> > >                                                  buf, len);
> > > @@ -3353,6 +3426,27 @@ static uint8_t *nand_transfer_oob(struct
> > > nand_chip *chip, uint8_t *oob,
> > >         return NULL;
> > >  }
> > >  
> > > +static void rawnand_enable_cont_reads(struct nand_chip *chip,
> > > unsigned int page,
> > > +                                     u32 readlen, int col)
> > > +{
> > > +       struct mtd_info *mtd = nand_to_mtd(chip);
> > > +
> > > +       if (!chip->controller->supported_op.cont_read)
> > > +               return;
> > > +
> > > +       if ((col && col + readlen < (3 * mtd->writesize)) ||
> > > +           (!col && readlen < (2 * mtd->writesize))) {
> > > +               chip->cont_read.ongoing = false;
> > > +               return;
> > > +       }
> > > +
> > > +       chip->cont_read.ongoing = true;
> > > +       chip->cont_read.first_page = page;
> > > +       if (col)
> > > +               chip->cont_read.first_page++;
> > > +       chip->cont_read.last_page = page + ((readlen >> chip-  
> > > > page_shift) & chip->pagemask);  
> > > +}
> > > +
> > >  /**
> > >   * nand_setup_read_retry - [INTERN] Set the READ RETRY mode
> > >   * @chip: NAND chip object
> > > @@ -3426,6 +3520,8 @@ static int nand_do_read_ops(struct
> > > nand_chip
> > > *chip, loff_t from,
> > >         oob = ops->oobbuf;
> > >         oob_required = oob ? 1 : 0;
> > >  
> > > +       rawnand_enable_cont_reads(chip, page, readlen, col);
> > > +
> > >         while (1) {
> > >                 struct mtd_ecc_stats ecc_stats = mtd->ecc_stats;
> > >  
> > > @@ -5009,12 +5105,27 @@ static void
> > > rawnand_early_check_supported_ops(struct nand_chip *chip)
> > >         rawnand_check_data_only_read_support(chip);
> > >  }
> > >  
> > > +static void rawnand_check_cont_read_support(struct nand_chip
> > > *chip)
> > > +{
> > > +       struct mtd_info *mtd = nand_to_mtd(chip);
> > > +
> > > +       if (chip->read_retries)
> > > +               return;
> > > +
> > > +       if (!nand_lp_exec_cont_read_page_op(chip, 0, 0, NULL,
> > > +                                           mtd->writesize,
> > > true))
> > > +               chip->controller->supported_op.cont_read = 1;
> > > +}
> > > +
> > >  static void rawnand_late_check_supported_ops(struct nand_chip
> > > *chip)
> > >  {
> > >         /* The supported_op fields should not be set by
> > > individual
> > > drivers */
> > > +       WARN_ON_ONCE(chip->controller->supported_op.cont_read);
> > >  
> > >         if (!nand_has_exec_op(chip))
> > >                 return;
> > > +
> > > +       rawnand_check_cont_read_support(chip);
> > >  }
> > >  
> > >  /*
> > > diff --git a/include/linux/mtd/rawnand.h
> > > b/include/linux/mtd/rawnand.h
> > > index 28c5dce782dd..1b0936fe3c6e 100644
> > > --- a/include/linux/mtd/rawnand.h
> > > +++ b/include/linux/mtd/rawnand.h
> > > @@ -67,6 +67,8 @@ struct gpio_desc;
> > >  
> > >  /* Extended commands for large page devices */
> > >  #define NAND_CMD_READSTART     0x30
> > > +#define NAND_CMD_READCACHESEQ  0x31
> > > +#define NAND_CMD_READCACHEEND  0x3f
> > >  #define NAND_CMD_RNDOUTSTART   0xE0
> > >  #define NAND_CMD_CACHEDPROG    0x15
> > >  
> > > @@ -1099,12 +1101,14 @@ struct nand_controller_ops {
> > >   * @supported_op.data_only_read: The controller supports reading
> > > more data from
> > >   *                     the bus without restarting an entire read
> > > operation nor
> > >   *                     changing the column.
> > > + * @supported_op.cont_read: The controller supports sequential
> > > cache
> > > reads.
> > >   */
> > >  struct nand_controller {
> > >         struct mutex lock;
> > >         const struct nand_controller_ops *ops;
> > >         struct {
> > >                 unsigned int data_only_read: 1;
> > > +               unsigned int cont_read: 1;
> > >         } supported_op;
> > >  };
> > >  
> > > @@ -1308,6 +1312,11 @@ struct nand_chip {
> > >         int read_retries;
> > >         struct nand_secure_region *secure_regions;
> > >         u8 nr_secure_regions;
> > > +       struct {
> > > +               bool ongoing;
> > > +               unsigned int first_page;
> > > +               unsigned int last_page;
> > > +       } cont_read;
> > >  
> > >         /* Externals */
> > >         struct nand_controller *controller;  
> >
Miquel Raynal Sept. 22, 2023, 9:20 a.m. UTC | #25
Hi Martin,

martin@geanix.com wrote on Fri, 15 Sep 2023 13:20:10 +0200:

> Hi Miquel,
> 
> On Tue, 2023-09-12 at 17:59 +0200, Miquel Raynal wrote:
> > Hi Martin,
> > 
> > martin@geanix.com wrote on Fri, 08 Sep 2023 14:25:59 +0200:
> >   
> > > Hi Miquel et al.
> > > 
> > > On Thu, 2023-01-12 at 10:36 +0100, Miquel Raynal wrote:  
> > > > From: JaimeLiao <jaimeliao.tw@gmail.com>
> > > > 
> > > > Add support for sequential cache reads for controllers using the
> > > > generic
> > > > core helpers for their fast read/write helpers.
> > > > 
> > > > Sequential reads may reduce the overhead when accessing
> > > > physically
> > > > continuous data by loading in cache the next page while the
> > > > previous
> > > > page gets sent out on the NAND bus.
> > > > 
> > > > The ONFI specification provides the following additional commands
> > > > to
> > > > handle sequential cached reads:
> > > > 
> > > > * 0x31 - READ CACHE SEQUENTIAL:
> > > >   Requires the NAND chip to load the next page into cache while
> > > > keeping
> > > >   the current cache available for host reads.
> > > > * 0x3F - READ CACHE END:
> > > >   Tells the NAND chip this is the end of the sequential cache
> > > > read,
> > > > the
> > > >   current cache shall remain accessible for the host but no more
> > > >   internal cache loading operation is required.
> > > > 
> > > > On the bus, a multi page read operation is currently handled like
> > > > this:
> > > > 
> > > >         00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN
> > > >         00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN
> > > >         00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN
> > > > 
> > > > Sequential cached reads may instead be achieved with:
> > > > 
> > > >         00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \
> > > >                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \
> > > >                        31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \
> > > >                        3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN
> > > > 
> > > > Below are the read speed test results with regular reads and
> > > > sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping
> > > > mode
> > > > with
> > > > a NAND chip characterized with the following timings:
> > > > * tR: 20 µs
> > > > * tRCBSY: 5 µs
> > > > * tRR: 20 ns
> > > > and the following geometry:
> > > > * device size: 2 MiB
> > > > * eraseblock size: 128 kiB
> > > > * page size: 2 kiB
> > > > 
> > > > ============= Normal read @ 33MHz =================
> > > > mtd_speedtest: eraseblock read speed is 15633 KiB/s
> > > > mtd_speedtest: page read speed is 15515 KiB/s
> > > > mtd_speedtest: 2 page read speed is 15398 KiB/s
> > > > ===================================================
> > > > 
> > > > ========= Sequential cache read @ 33MHz ===========
> > > > mtd_speedtest: eraseblock read speed is 18285 KiB/s
> > > > mtd_speedtest: page read speed is 15875 KiB/s
> > > > mtd_speedtest: 2 page read speed is 16253 KiB/s
> > > > ===================================================
> > > > 
> > > > We observe an overall speed improvement of about 5% when reading
> > > > 2 pages, up to 15% when reading an entire block. This is due to
> > > > the
> > > > ~14us gain on each additional page read (tR - (tRCBSY + tRR)).
> > > > 
> > > > Co-developed-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > > > Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> > > > Signed-off-by: JaimeLiao <jaimeliao.tw@gmail.com>    
> > > 
> > > This patch broke our imx6ull system after doing a kernel upgrade:
> > > [    2.921886] ubi0: default fastmap pool size: 100
> > > [    2.926612] ubi0: default fastmap WL pool size: 50
> > > [    2.931421] ubi0: attaching mtd1
> > > [    3.515799] ubi0: scanning is finished
> > > [    3.525237] ubi0 error: vtbl_check: bad CRC at record 0:
> > > 0x88cdfb6,
> > > not 0xffffffff
> > > [    3.532937] Volume table record 0 dump:
> > > [    3.536783]  reserved_pebs   -1
> > > [    3.539932]  alignment       -1
> > > [    3.543101]  data_pad        -1
> > > [    3.546251]  vol_type        255
> > > [    3.549485]  upd_marker      255
> > > [    3.552746]  name_len        65535
> > > [    3.556155]  1st 5 characters of name: 
> > > [    3.560429]  crc             0xffffffff
> > > [    3.564294] ubi0 error: vtbl_check: bad CRC at record 0:
> > > 0x88cdfb6,
> > > not 0xffffffff
> > > [    3.571892] Volume table record 0 dump:
> > > [    3.575754]  reserved_pebs   -1
> > > [    3.578906]  alignment       -1
> > > [    3.582049]  data_pad        -1
> > > [    3.585216]  vol_type        255
> > > [    3.588452]  upd_marker      255
> > > [    3.591687]  name_len        65535
> > > [    3.595108]  1st 5 characters of name: 
> > > [    3.599384]  crc             0xffffffff
> > > [    3.603285] ubi0 error: ubi_read_volume_table: both volume
> > > tables
> > > are corrupted
> > > [    3.611460] ubi0 error: ubi_attach_mtd_dev: failed to attach
> > > mtd1,
> > > error -22
> > > [    3.618760] UBI error: cannot attach mtd1
> > > [    3.622831] UBI: block: can't open volume on ubi0_4, err=-19
> > > [    3.628505] UBI: block: can't open volume on ubi0_6, err=-19
> > > [    3.634196] UBI: block: can't open volume on ubi0_7, err=-19
> > > 
> > > It fails consistently at every attach operation. As mentioned
> > > above,
> > > this is on an i.MX6ULL and a Toshiba NAND chip:
> > > [    0.530121] nand: device found, Manufacturer ID: 0x98, Chip ID:
> > > 0xdc
> > > [    0.530173] nand: Toshiba NAND 512MiB 3,3V 8-bit
> > > [    0.530194] nand: 512 MiB, SLC, erase size: 256 KiB, page size:
> > > 4096, OOB size: 128
> > > 
> > > I'm happy to perform experiments to fix this.  
> > 
> > I received two other reports using another controller and two
> > different
> > chips, we investigated the timings which could be the issue but found
> > nothing relevant. I thought it was specific to the controller (or its
> > driver) but if you reproduce on imx6 it must be something else.
> > Especially since this series was also tested on imx6. So maybe some
> > devices do not really support what they advertise? Or they expect
> > another sequence? I need to investigate this further but I am a bit
> > clueless.  
> > >   
> 
> For what it's worth: I've tested sequential read on an almost identical
> board that has a Micron flash instead of the Toshiba one:
> 
> [    1.370336] nand: device found, Manufacturer ID: 0x2c, Chip ID: 0xdc
> [    1.376830] nand: Micron MT29F4G08ABAFAWP
> [    1.380870] nand: 512 MiB, SLC, erase size: 256 KiB, page size:
> 4096, OOB size: 256
> 
> 
> Fist, ubi seems to be happy:
> 
> [    2.702415] ubi0: default fastmap pool size: 100                   
> [    2.707138] ubi0: default fastmap WL pool size: 50                 
> [    2.711946] ubi0: attaching mtd1                                   
> [    3.528830] ubi0: scanning is finished                             
> [    3.540626] ubi0: attached mtd1 (name "ubi", size 504 MiB)         
> [    3.546235] ubi0: PEB size: 262144 bytes (256 KiB), LEB size: 253952
> bytes                                                                 
> [    3.553169] ubi0: min./max. I/O unit sizes: 4096/4096, sub-page size
> 4096                                                                  
> [    3.559972] ubi0: VID header offset: 4096 (aligned 4096), data
> offset: 8192                                                          
> [    3.566968] ubi0: good PEBs: 2012, bad PEBs: 4, corrupted PEBs: 0  
> [    3.573096] ubi0: user volume: 9, internal volumes: 1, max. volumes
> count: 128                                                            
> [    3.580330] ubi0: max/mean erase counter: 5/2, WL threshold: 4096,
> image sequence number: 1298270752                                     
> [    3.589496] ubi0: available PEBs: 12, total reserved PEBs: 2000,
> PEBs reserved for bad PEB handling: 36                                
> [    3.598945] ubi0: background thread "ubi_bgt0d" started, PID 35    
> [    3.607409] block ubiblock0_4: created from ubi0:4(rootfs.a)       
> [    3.615190] block ubiblock0_6: created from ubi0:6(appfs.a)        
> [    3.622888] block ubiblock0_7: created from ubi0:7(appfs.b)
> 
> 
> But then squashfs fails (we're using ubiblock devices):
> 
> [    3.651818] VFS: Mounted root (squashfs filesystem) readonly on
> device 254:0.                                                         
> [    3.660646] devtmpfs: mounted                                      
> [    3.665515] Freeing unused kernel image (initmem) memory: 1024K    
> [    3.672816] Run /sbin/init as init process                         
> [    3.678097] SQUASHFS error: Unable to read inode 0x8008115d        
> [    3.683917] Starting init: /sbin/init exists but couldn't execute it
> (error -22)                                                           
> [    3.691340] Run /etc/init as init process                          
> [    3.697553] Run /bin/init as init process                          
> [    3.702982] Run /bin/sh as init process
> 
> [    2.702415] ubi0: default fastmap pool size: 100
> [    2.707138] ubi0: default fastmap WL pool size: 50
> [    2.711946] ubi0: attaching mtd1                                   
> [    3.528830] ubi0: scanning is finished                             
> [    3.540626] ubi0: attached mtd1 (name "ubi", size 504 MiB)
> [    3.546235] ubi0: PEB size: 262144 bytes (256 KiB), LEB size: 253952
> bytes
> [    3.553169] ubi0: min./max. I/O unit sizes: 4096/4096, sub-page size
> 4096
> [    3.559972] ubi0: VID header offset: 4096 (aligned 4096), data
> offset: 8192
> [    3.566968] ubi0: good PEBs: 2012, bad PEBs: 4, corrupted PEBs: 0
> [    3.573096] ubi0: user volume: 9, internal volumes: 1, max. volumes
> count: 128
> [    3.580330] ubi0: max/mean erase counter: 5/2, WL threshold: 4096,
> image sequence number: 1298270752
> [    3.589496] ubi0: available PEBs: 12, total reserved PEBs: 2000,
> PEBs reserved for bad PEB handling: 36
> [    3.598945] ubi0: background thread "ubi_bgt0d" started, PID 35

Can you show me the errors? Did you look at the flash raw content,
anything special that can be seen?

> 
> I think the patch should be reverted, until we get some more insight.
> 

I cannot make any sense of all the different feedback I've got (good
and bad). I will revert the patch for now.

Thanks,
Miquèl
diff mbox series

Patch

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 34395d5d3a47..0b1fd6bbb36b 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1208,6 +1208,73 @@  static int nand_lp_exec_read_page_op(struct nand_chip *chip, unsigned int page,
 	return nand_exec_op(chip, &op);
 }
 
+static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int page,
+					  unsigned int offset_in_page, void *buf,
+					  unsigned int len, bool check_only)
+{
+	const struct nand_interface_config *conf =
+		nand_get_interface_config(chip);
+	u8 addrs[5];
+	struct nand_op_instr start_instrs[] = {
+		NAND_OP_CMD(NAND_CMD_READ0, 0),
+		NAND_OP_ADDR(4, addrs, 0),
+		NAND_OP_CMD(NAND_CMD_READSTART, NAND_COMMON_TIMING_NS(conf, tWB_max)),
+		NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max), 0),
+		NAND_OP_CMD(NAND_CMD_READCACHESEQ, NAND_COMMON_TIMING_NS(conf, tWB_max)),
+		NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
+				 NAND_COMMON_TIMING_NS(conf, tRR_min)),
+		NAND_OP_DATA_IN(len, buf, 0),
+	};
+	struct nand_op_instr cont_instrs[] = {
+		NAND_OP_CMD(page == chip->cont_read.last_page ?
+			    NAND_CMD_READCACHEEND : NAND_CMD_READCACHESEQ,
+			    NAND_COMMON_TIMING_NS(conf, tWB_max)),
+		NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max),
+				 NAND_COMMON_TIMING_NS(conf, tRR_min)),
+		NAND_OP_DATA_IN(len, buf, 0),
+	};
+	struct nand_operation start_op = NAND_OPERATION(chip->cur_cs, start_instrs);
+	struct nand_operation cont_op = NAND_OPERATION(chip->cur_cs, cont_instrs);
+	int ret;
+
+	if (!len) {
+		start_op.ninstrs--;
+		cont_op.ninstrs--;
+	}
+
+	ret = nand_fill_column_cycles(chip, addrs, offset_in_page);
+	if (ret < 0)
+		return ret;
+
+	addrs[2] = page;
+	addrs[3] = page >> 8;
+
+	if (chip->options & NAND_ROW_ADDR_3) {
+		addrs[4] = page >> 16;
+		start_instrs[1].ctx.addr.naddrs++;
+	}
+
+	/* Check if cache reads are supported */
+	if (check_only) {
+		if (nand_check_op(chip, &start_op) || nand_check_op(chip, &cont_op))
+			return -EOPNOTSUPP;
+
+		return 0;
+	}
+
+	if (page == chip->cont_read.first_page)
+		return nand_exec_op(chip, &start_op);
+	else
+		return nand_exec_op(chip, &cont_op);
+}
+
+static bool rawnand_cont_read_ongoing(struct nand_chip *chip, unsigned int page)
+{
+	return chip->cont_read.ongoing &&
+		page >= chip->cont_read.first_page &&
+		page <= chip->cont_read.last_page;
+}
+
 /**
  * nand_read_page_op - Do a READ PAGE operation
  * @chip: The NAND chip
@@ -1233,10 +1300,16 @@  int nand_read_page_op(struct nand_chip *chip, unsigned int page,
 		return -EINVAL;
 
 	if (nand_has_exec_op(chip)) {
-		if (mtd->writesize > 512)
-			return nand_lp_exec_read_page_op(chip, page,
-							 offset_in_page, buf,
-							 len);
+		if (mtd->writesize > 512) {
+			if (rawnand_cont_read_ongoing(chip, page))
+				return nand_lp_exec_cont_read_page_op(chip, page,
+								      offset_in_page,
+								      buf, len, false);
+			else
+				return nand_lp_exec_read_page_op(chip, page,
+								 offset_in_page, buf,
+								 len);
+		}
 
 		return nand_sp_exec_read_page_op(chip, page, offset_in_page,
 						 buf, len);
@@ -3353,6 +3426,27 @@  static uint8_t *nand_transfer_oob(struct nand_chip *chip, uint8_t *oob,
 	return NULL;
 }
 
+static void rawnand_enable_cont_reads(struct nand_chip *chip, unsigned int page,
+				      u32 readlen, int col)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	if (!chip->controller->supported_op.cont_read)
+		return;
+
+	if ((col && col + readlen < (3 * mtd->writesize)) ||
+	    (!col && readlen < (2 * mtd->writesize))) {
+		chip->cont_read.ongoing = false;
+		return;
+	}
+
+	chip->cont_read.ongoing = true;
+	chip->cont_read.first_page = page;
+	if (col)
+		chip->cont_read.first_page++;
+	chip->cont_read.last_page = page + ((readlen >> chip->page_shift) & chip->pagemask);
+}
+
 /**
  * nand_setup_read_retry - [INTERN] Set the READ RETRY mode
  * @chip: NAND chip object
@@ -3426,6 +3520,8 @@  static int nand_do_read_ops(struct nand_chip *chip, loff_t from,
 	oob = ops->oobbuf;
 	oob_required = oob ? 1 : 0;
 
+	rawnand_enable_cont_reads(chip, page, readlen, col);
+
 	while (1) {
 		struct mtd_ecc_stats ecc_stats = mtd->ecc_stats;
 
@@ -5009,12 +5105,27 @@  static void rawnand_early_check_supported_ops(struct nand_chip *chip)
 	rawnand_check_data_only_read_support(chip);
 }
 
+static void rawnand_check_cont_read_support(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	if (chip->read_retries)
+		return;
+
+	if (!nand_lp_exec_cont_read_page_op(chip, 0, 0, NULL,
+					    mtd->writesize, true))
+		chip->controller->supported_op.cont_read = 1;
+}
+
 static void rawnand_late_check_supported_ops(struct nand_chip *chip)
 {
 	/* The supported_op fields should not be set by individual drivers */
+	WARN_ON_ONCE(chip->controller->supported_op.cont_read);
 
 	if (!nand_has_exec_op(chip))
 		return;
+
+	rawnand_check_cont_read_support(chip);
 }
 
 /*
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 28c5dce782dd..1b0936fe3c6e 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -67,6 +67,8 @@  struct gpio_desc;
 
 /* Extended commands for large page devices */
 #define NAND_CMD_READSTART	0x30
+#define NAND_CMD_READCACHESEQ	0x31
+#define NAND_CMD_READCACHEEND	0x3f
 #define NAND_CMD_RNDOUTSTART	0xE0
 #define NAND_CMD_CACHEDPROG	0x15
 
@@ -1099,12 +1101,14 @@  struct nand_controller_ops {
  * @supported_op.data_only_read: The controller supports reading more data from
  *			the bus without restarting an entire read operation nor
  *			changing the column.
+ * @supported_op.cont_read: The controller supports sequential cache reads.
  */
 struct nand_controller {
 	struct mutex lock;
 	const struct nand_controller_ops *ops;
 	struct {
 		unsigned int data_only_read: 1;
+		unsigned int cont_read: 1;
 	} supported_op;
 };
 
@@ -1308,6 +1312,11 @@  struct nand_chip {
 	int read_retries;
 	struct nand_secure_region *secure_regions;
 	u8 nr_secure_regions;
+	struct {
+		bool ongoing;
+		unsigned int first_page;
+		unsigned int last_page;
+	} cont_read;
 
 	/* Externals */
 	struct nand_controller *controller;