diff mbox series

[v6,2/2] mtd: rawnand: meson: add support for Amlogic NAND flash controller

Message ID 1541090542-19618-3-git-send-email-jianxin.pan@amlogic.com
State Superseded
Delegated to: Miquel Raynal
Headers show
Series mtd: rawnand: meson: add Amlogic NAND driver support | expand

Commit Message

Jianxin Pan Nov. 1, 2018, 4:42 p.m. UTC
From: Liang Yang <liang.yang@amlogic.com>

Add initial support for the Amlogic NAND flash controller which found
in the Meson-GXBB/GXL/AXG SoCs.

Signed-off-by: Liang Yang <liang.yang@amlogic.com>
Signed-off-by: Yixun Lan <yixun.lan@amlogic.com>
Signed-off-by: Jianxin Pan <jianxin.pan@amlogic.com>
---
 drivers/mtd/nand/raw/Kconfig      |   10 +
 drivers/mtd/nand/raw/Makefile     |    1 +
 drivers/mtd/nand/raw/meson_nand.c | 1360 +++++++++++++++++++++++++++++++++++++
 3 files changed, 1371 insertions(+)
 create mode 100644 drivers/mtd/nand/raw/meson_nand.c

Comments

Boris Brezillon Nov. 5, 2018, 3:53 p.m. UTC | #1
On Fri, 2 Nov 2018 00:42:21 +0800
Jianxin Pan <jianxin.pan@amlogic.com> wrote:

> +#define NFC_REG_CMD		0x00
> +#define NFC_CMD_DRD		(0x8 << 14)
> +#define NFC_CMD_IDLE		(0xc << 14)
> +#define NFC_CMD_DWR		(0x4 << 14)
> +#define NFC_CMD_CLE		(0x5 << 14)
> +#define NFC_CMD_ALE		(0x6 << 14)
> +#define NFC_CMD_ADL		((0 << 16) | (3 << 20))
> +#define NFC_CMD_ADH		((1 << 16) | (3 << 20))
> +#define NFC_CMD_AIL		((2 << 16) | (3 << 20))
> +#define NFC_CMD_AIH		((3 << 16) | (3 << 20))
> +#define NFC_CMD_SEED		((8 << 16) | (3 << 20))
> +#define NFC_CMD_M2N		((0 << 17) | (2 << 20))
> +#define NFC_CMD_N2M		((1 << 17) | (2 << 20))
> +#define NFC_CMD_RB		BIT(20)
> +#define NFC_CMD_IO6		((0xb << 10) | (1 << 18))
> +#define NFC_CMD_SCRAMBLER_ENABLE	BIT(19)
> +#define NFC_CMD_RB_INT		BIT(14)
> +
> +#define NFC_CMD_GET_SIZE(x)	(((x) >> 22) & GENMASK(4, 0))
> +#define NFC_CMD_RB_TIMEOUT	0x18

Where does this value come from? Is it the typical timeout value,
and if it is, what's the value in milli/microseconds?

> +
> +#define NFC_REG_CFG		0x04
> +#define NFC_REG_DADR		0x08
> +#define NFC_REG_IADR		0x0c
> +#define NFC_REG_BUF		0x10
> +#define NFC_REG_INFO		0x14
> +#define NFC_REG_DC		0x18
> +#define NFC_REG_ADR		0x1c
> +#define NFC_REG_DL		0x20
> +#define NFC_REG_DH		0x24
> +#define NFC_REG_CADR		0x28
> +#define NFC_REG_SADR		0x2c
> +#define NFC_REG_PINS		0x30
> +#define NFC_REG_VER		0x38
> +
> +#define NFC_RB_IRQ_EN		BIT(21)
> +#define NFC_INT_MASK		(3 << 20)
> +
> +#define CMDRWGEN(cmd_dir, ran, bch, short_mode, page_size, pages)	\
> +	(								\
> +		(cmd_dir)			|			\
> +		((ran) << 19)			|			\
> +		((bch) << 14)			|			\
> +		((short_mode) << 13)		|			\
> +		(((page_size) & 0x7f) << 6)	|			\
> +		((pages) & 0x3f)					\
> +	)
> +
> +#define GENCMDDADDRL(adl, addr)		((adl) | ((addr) & 0xffff))
> +#define GENCMDDADDRH(adh, addr)		((adh) | (((addr) >> 16) & 0xffff))
> +#define GENCMDIADDRL(ail, addr)		((ail) | ((addr) & 0xffff))
> +#define GENCMDIADDRH(aih, addr)		((aih) | (((addr) >> 16) & 0xffff))
> +
> +#define RB_STA(x)		(1 << (26 + (x)))
> +#define DMA_DIR(dir)		((dir) ? NFC_CMD_N2M : NFC_CMD_M2N)
> +
> +#define ECC_CHECK_RETURN_FF	(-1)
> +
> +#define NAND_CE0		(0xe << 10)
> +#define NAND_CE1		(0xd << 10)
> +
> +#define DMA_BUSY_TIMEOUT	0x100000
> +#define CMD_FIFO_EMPTY_TIMEOUT	1000
> +
> +#define MAX_CE_NUM		2
> +
> +/* eMMC clock register, misc control */
> +#define SD_EMMC_CLOCK		0x00
> +#define CLK_ALWAYS_ON		BIT(28)
> +#define CLK_SELECT_NAND		BIT(31)
> +#define CLK_DIV_MASK		GENMASK(5, 0)
> +
> +#define NFC_CLK_CYCLE		6
> +
> +/* nand flash controller delay 3 ns */
> +#define NFC_DEFAULT_DELAY	3000
> +
> +#define MAX_ECC_INDEX		10
> +
> +#define MUX_CLK_NUM_PARENTS	2
> +
> +#define ROW_ADDER(page, index)	(((page) >> (8 * (index))) & 0xff)
> +#define MAX_CYCLE_ROW_ADDRS	3
> +#define MAX_CYCLE_COLUMN_ADDRS	2
> +#define DIRREAD			1
> +#define DIRWRITE		0
> +
> +#define ECC_PARITY_BCH8_512B	14
> +
> +#define PER_INFO_BYTE		8
> +#define ECC_GET_PROTECTED_OOB_BYTE(x, y)			\
> +		((x) >> (8 * (y)) & GENMASK(7, 0))

		(le64_to_cpu(x) >> (8 * (y)) & GENMASK(7, 0))

> +
> +#define ECC_SET_PROTECTED_OOB_BYTE(x, y, z)			\
> +	{							\
> +		(x) &= (~((__le64)(0xff) << (8 * (y))));	\

It's probably better to memset(0) the info_buf so that you can drop
this masking step.

> +		(x) |= ((__le64)(z) << (8 * (y)));		\

		    |= cpu_to_le64((u64)z << (8 * (y)));

> +	}
> +
> +#define ECC_COMPLETE            BIT(31)
> +#define ECC_ERR_CNT(x)		(((x) >> 24) & GENMASK(5, 0))
> +#define ECC_ZERO_CNT(x)		(((x) >> 16) & GENMASK(5, 0))
> +
> +struct meson_nfc_nand_chip {
> +	struct list_head node;
> +	struct nand_chip nand;
> +	int clk_rate;
> +	int level1_divider;
> +	int bus_timing;
> +	int twb;
> +	int tadl;
> +
> +	int bch_mode;
> +	u8 *data_buf;
> +	__le64 *info_buf;
> +	int nsels;
> +	u8 sels[0];
> +};

Please use unsigned integers where having a negative value is not
possible. I'm pretty sure this is the case of all int fields in this
struct. The same applies to the other structs.

[...]

> +
> +static int meson_nfc_wait_cmd_finish(struct meson_nfc *nfc,
> +				     unsigned int timeout_ms)
> +{
> +	u32 cmd_size = 0;
> +	int ret;
> +
> +	/* wait cmd fifo is empty */
> +	ret = readl_poll_timeout(nfc->reg_base + NFC_REG_CMD, cmd_size,
> +				 !NFC_CMD_GET_SIZE(cmd_size),
> +				 10, timeout_ms * 1000);

I guess you could use the relaxed version here.

> +	if (ret)
> +		dev_err(nfc->dev, "wait for empty cmd FIFO time out\n");
> +
> +	return ret;
> +}
> +
> +static int meson_nfc_wait_dma_finish(struct meson_nfc *nfc)
> +{
> +	meson_nfc_drain_cmd(nfc);
> +
> +	return meson_nfc_wait_cmd_finish(nfc, DMA_BUSY_TIMEOUT);
> +}
> +
> +static inline __le64 nfc_info_ptr(struct nand_chip *nand, int index)

Drop inline specifiers for things that are defined in .c files. The
compiler should be smart enough to determine when inlining is
appropriate.

> +{
> +	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
> +
> +	return le64_to_cpu(meson_chip->info_buf[index]);

According to the function prototype, you should return
meson_chip->info_buf[index] directly, but I'm not even sure why you
need this helper. Just access meson_chip->info_buf[] directly.

> +}
> +

[...]

> +
> +static int meson_nfc_queue_rb(struct meson_nfc *nfc, int timeout_ms)
> +{
> +	u32 cmd, cfg;
> +	int ret = 0;
> +
> +	meson_nfc_cmd_idle(nfc, nfc->timing.twb);
> +	meson_nfc_drain_cmd(nfc);
> +	meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);
> +
> +	cfg = readl(nfc->reg_base + NFC_REG_CFG);
> +	cfg |= (1 << 21);
> +	writel(cfg, nfc->reg_base + NFC_REG_CFG);
> +
> +	init_completion(&nfc->completion);
> +
> +	cmd = NFC_CMD_RB | NFC_CMD_RB_INT
> +		| nfc->param.chip_select | NFC_CMD_RB_TIMEOUT;
> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +
> +	ret = wait_for_completion_timeout(&nfc->completion,
> +					  msecs_to_jiffies(timeout_ms));
> +	if (ret == 0) {
> +		dev_err(nfc->dev, "wait nand irq timeout\n");
> +		ret = -1;
> +	}
> +	return ret;
> +}
> +
> +static void meson_nfc_set_user_byte(struct nand_chip *nand, u8 *oob_buf)

	       ^ meson_nfc_set_protected_oob_bytes()

> +{
> +	__le64 info;
> +	int i, count;
> +
> +	for (i = 0, count = 0; i < nand->ecc.steps; i++, count += 2) {
> +		info = nfc_info_ptr(nand, i);
> +		ECC_SET_PROTECTED_OOB_BYTE(info, 0, oob_buf[count]);
> +		ECC_SET_PROTECTED_OOB_BYTE(info, 1, oob_buf[count + 1]);

This is a NOOP: info is a local variable, so, anything you put in there
is just lost. It could work if nfc_info_ptr() was returning a pointer
and the local info var was also a pointer, but that's not the case
here.

BTW, maybe you don't need the ECC_SET/GET_PROTECTED_OOB_BYTE() macros.
Just do the conversion in the meson_nfc_get/set_protected_oob_bytes()
functions.

> +	}
> +}
> +
> +static void meson_nfc_get_user_byte(struct nand_chip *nand, u8 *oob_buf)

	       ^ meson_nfc_get_protected_oob_bytes()

> +{
> +	__le64 info;
> +	int i, count;
> +
> +	for (i = 0, count = 0; i < nand->ecc.steps; i++, count += 2) {
> +		info = nfc_info_ptr(nand, i);
> +		oob_buf[count] = ECC_GET_PROTECTED_OOB_BYTE(info, 0);
> +		oob_buf[count + 1] = ECC_GET_PROTECTED_OOB_BYTE(info, 1);
> +	}
> +}
> +
> +static int meson_nfc_ecc_correct(struct nand_chip *nand)
> +{
> +	struct mtd_info *mtd = nand_to_mtd(nand);
> +	__le64 info;
> +	u32 bitflips = 0, i;
> +	int scramble;
> +	u8 zero_cnt;
> +
> +	scramble = (nand->options & NAND_NEED_SCRAMBLING) ? 1 : 0;
> +
> +	for (i = 0; i < nand->ecc.steps; i++) {
> +		info = nfc_info_ptr(nand, i);
> +		if (ECC_ERR_CNT(info) == 0x3f) {
> +			zero_cnt = ECC_ZERO_CNT(info);
> +			if (scramble && zero_cnt < nand->ecc.strength)
> +				return ECC_CHECK_RETURN_FF;
> +			mtd->ecc_stats.failed++;
> +			continue;
> +		}
> +		mtd->ecc_stats.corrected += ECC_ERR_CNT(info);
> +		bitflips = max_t(u32, bitflips, ECC_ERR_CNT(info));
> +	}
> +
> +	return bitflips;
> +}
> +
> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
> +{
> +	struct nand_chip *nand = mtd_to_nand(mtd);
> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> +	u32 cmd;
> +
> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +
> +	meson_nfc_drain_cmd(nfc);

You probably don't want to drain the FIFO every time you read a byte on
the bus, and I guess the INPUT FIFO is at least as big as the CMD
FIFO, right? If that's the case, you should queue as much DRD cmd as
possible and only sync when the user explicitly requests it or when
the INPUT/READ FIFO is full.

> +
> +	meson_nfc_wait_cmd_finish(nfc, 1000);
> +
> +	return readb(nfc->reg_base + NFC_REG_BUF);
> +}
> +
> +static void meson_nfc_read_buf(struct mtd_info *mtd, u8 *buf, int len)
> +{
> +	int i;
> +
> +	for (i = 0; i < len; i++)
> +		buf[i] = meson_nfc_read_byte(mtd);
> +}
> +
> +static void meson_nfc_write_byte(struct mtd_info *mtd, u8 byte)
> +{
> +	struct meson_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
> +	u32 cmd;
> +
> +	meson_nfc_cmd_idle(nfc, nfc->timing.twb);
> +
> +	cmd = nfc->param.chip_select | NFC_CMD_DWR | (byte & 0xff);
> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +
> +	meson_nfc_cmd_idle(nfc, nfc->timing.twb);

Nope, tWB is not needed between 2 data-write cycles. It's only needed
when a wait-RB is requested after a write.

> +	meson_nfc_cmd_idle(nfc, 0);

Why do you need that one?

> +
> +	meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);

As for the read_byte() implementation, I don't think you should force a
sync here.

> +}
> +
> +static void meson_nfc_write_buf(struct mtd_info *mtd,
> +				const u8 *buf, int len)
> +{
> +	int i;
> +
> +	for (i = 0; i < len; i++)
> +		meson_nfc_write_byte(mtd, buf[i]);
> +}
> +
> +static int meson_nfc_rw_cmd_prepare_and_execute(struct nand_chip *nand,
> +						int page, bool in)
> +{
> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> +	const struct nand_sdr_timings *sdr =
> +		nand_get_sdr_timings(&nand->data_interface);
> +	int cs = nfc->param.chip_select;
> +	int i, cmd0, cmd_num;
> +	int ret = 0;
> +
> +	cmd0 = in ? NAND_CMD_READ0 : NAND_CMD_SEQIN;
> +	cmd_num = sizeof(struct nand_rw_cmd) / sizeof(int);
> +	if (!in)
> +		cmd_num--;
> +
> +	nfc->cmdfifo.rw.cmd0 = cs | NFC_CMD_CLE | cmd0;
> +	for (i = 0; i < MAX_CYCLE_COLUMN_ADDRS; i++)
> +		nfc->cmdfifo.rw.col[i] = cs | NFC_CMD_ALE | 0;
> +
> +	for (i = 0; i < MAX_CYCLE_ROW_ADDRS; i++)
> +		nfc->cmdfifo.rw.row[i] = cs | NFC_CMD_ALE | ROW_ADDER(page, i);
> +
> +	nfc->cmdfifo.rw.cmd1 = cs | NFC_CMD_CLE | NAND_CMD_READSTART;

Having a fixed size array for the column and row address cycles does
not sound like a good idea to me (depending on the NAND chip you
connect, the number of cycles for the row and column differ), which
makes me realize the nand_rw_cmd struct is not such a good thing...

> +
> +	for (i = 0; i < cmd_num; i++)
> +		writel(nfc->cmdfifo.cmd[i], nfc->reg_base + NFC_REG_CMD);

... why not write directly to the CMD reg?

> +
> +	if (in)
> +		meson_nfc_queue_rb(nfc, sdr->tR_max);
> +	else
> +		meson_nfc_cmd_idle(nfc, nfc->timing.tadl);
> +
> +	return ret;
> +}
> +
> +static int meson_nfc_write_page_sub(struct nand_chip *nand, const u8 *buf,
> +				    int page, int raw)
> +{
> +	struct mtd_info *mtd = nand_to_mtd(nand);
> +	const struct nand_sdr_timings *sdr =
> +		nand_get_sdr_timings(&nand->data_interface);
> +	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> +	dma_addr_t daddr, iaddr;
> +	u32 cmd;
> +	int ret;
> +
> +	daddr = dma_map_single(nfc->dev, (void *)meson_chip->data_buf,
> +			       mtd->writesize + mtd->oobsize,
> +			       DMA_TO_DEVICE);
> +	ret = dma_mapping_error(nfc->dev, daddr);
> +	if (ret) {
> +		dev_err(nfc->dev, "dma mapping error\n");
> +		goto err;
> +	}
> +
> +	iaddr = dma_map_single(nfc->dev, (void *)meson_chip->info_buf,
> +			       nand->ecc.steps * PER_INFO_BYTE,
> +			       DMA_TO_DEVICE);
> +	ret = dma_mapping_error(nfc->dev, iaddr);
> +	if (ret) {
> +		dev_err(nfc->dev, "dma mapping error\n");
> +		goto err_map_daddr;
> +	}
> +
> +	ret = meson_nfc_rw_cmd_prepare_and_execute(nand, page, DIRWRITE);
> +	if (ret)
> +		goto err_map_iaddr;
> +
> +	cmd = GENCMDDADDRL(NFC_CMD_ADL, daddr);
> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +
> +	cmd = GENCMDDADDRH(NFC_CMD_ADH, daddr);
> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +
> +	cmd = GENCMDIADDRL(NFC_CMD_AIL, iaddr);
> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +
> +	cmd = GENCMDIADDRH(NFC_CMD_AIH, iaddr);
> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +
> +	meson_nfc_cmd_seed(nfc, page);
> +
> +	meson_nfc_cmd_access(nand, raw, DIRWRITE);
> +
> +	ret = meson_nfc_wait_dma_finish(nfc);
> +	cmd = nfc->param.chip_select | NFC_CMD_CLE | NAND_CMD_PAGEPROG;
> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +	meson_nfc_queue_rb(nfc, sdr->tPROG_max);

Don't you have a way to queue the PAGEPROG and WAIT_RB instructions
before the DMA finishes?

> +
> +err_map_iaddr:
> +	dma_unmap_single(nfc->dev, iaddr,
> +			 nand->ecc.steps * PER_INFO_BYTE, DMA_TO_DEVICE);
> +err_map_daddr:
> +	dma_unmap_single(nfc->dev, daddr,
> +			 mtd->writesize + mtd->oobsize, DMA_TO_DEVICE);
> +err:
> +	return ret;
> +}
> +

[...]

> +static int meson_nfc_exec_op(struct nand_chip *nand,
> +			     const struct nand_operation *op, bool check_only)
> +{
> +	struct mtd_info *mtd = nand_to_mtd(nand);
> +	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> +	const struct nand_op_instr *instr = NULL;
> +	int ret = 0, cmd;
> +	unsigned int op_id;
> +	int i, delay_idle;
> +
> +	for (op_id = 0; op_id < op->ninstrs; op_id++) {
> +		instr = &op->instrs[op_id];
> +		delay_idle = DIV_ROUND_UP(PSEC_TO_NSEC(instr->delay_ns),
> +					  meson_chip->level1_divider
> +					  * NFC_CLK_CYCLE);

On multi-line operations, put the operator on the previous line:

					  meson_chip->level1_divider *
					  NFC_CLK_CYCLE);


> +		switch (instr->type) {
> +		case NAND_OP_CMD_INSTR:
> +			cmd = nfc->param.chip_select | NFC_CMD_CLE;
> +			cmd |= instr->ctx.cmd.opcode & 0xff;
> +			writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +			meson_nfc_cmd_idle(nfc, delay_idle);
> +			break;
> +
> +		case NAND_OP_ADDR_INSTR:
> +			for (i = 0; i < instr->ctx.addr.naddrs; i++) {
> +				cmd = nfc->param.chip_select | NFC_CMD_ALE;
> +				cmd |= instr->ctx.addr.addrs[i] & 0xff;
> +				writel(cmd, nfc->reg_base + NFC_REG_CMD);
> +			}
> +			meson_nfc_cmd_idle(nfc, delay_idle);
> +			break;
> +
> +		case NAND_OP_DATA_IN_INSTR:
> +			meson_nfc_read_buf(mtd, instr->ctx.data.buf.in,
> +					   instr->ctx.data.len);
> +			break;
> +
> +		case NAND_OP_DATA_OUT_INSTR:
> +			meson_nfc_write_buf(mtd, instr->ctx.data.buf.out,
> +					    instr->ctx.data.len);
> +			break;
> +
> +		case NAND_OP_WAITRDY_INSTR:
> +			meson_nfc_queue_rb(nfc, instr->ctx.waitrdy.timeout_ms);
> +			meson_nfc_cmd_idle(nfc, delay_idle);
> +			break;
> +		}

I think you could move the meson_nfc_cmd_idle() call here, and only add
it when instr->delay_ns != 0:

		if (instr->delay_ns)
			meson_nfc_cmd_idle(nfc, delay_idle);

> +	}

Don't you need a call to meson_nfc_wait_cmd_finish() here?

> +	return ret;
> +}
Liang Yang Nov. 6, 2018, 9:08 a.m. UTC | #2
On 2018/11/5 23:53, Boris Brezillon wrote:
> On Fri, 2 Nov 2018 00:42:21 +0800
> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
> 
>> +#define NFC_REG_CMD		0x00
>> +#define NFC_CMD_DRD		(0x8 << 14)
>> +#define NFC_CMD_IDLE		(0xc << 14)
>> +#define NFC_CMD_DWR		(0x4 << 14)
>> +#define NFC_CMD_CLE		(0x5 << 14)
>> +#define NFC_CMD_ALE		(0x6 << 14)
>> +#define NFC_CMD_ADL		((0 << 16) | (3 << 20))
>> +#define NFC_CMD_ADH		((1 << 16) | (3 << 20))
>> +#define NFC_CMD_AIL		((2 << 16) | (3 << 20))
>> +#define NFC_CMD_AIH		((3 << 16) | (3 << 20))
>> +#define NFC_CMD_SEED		((8 << 16) | (3 << 20))
>> +#define NFC_CMD_M2N		((0 << 17) | (2 << 20))
>> +#define NFC_CMD_N2M		((1 << 17) | (2 << 20))
>> +#define NFC_CMD_RB		BIT(20)
>> +#define NFC_CMD_IO6		((0xb << 10) | (1 << 18))
>> +#define NFC_CMD_SCRAMBLER_ENABLE	BIT(19)
>> +#define NFC_CMD_RB_INT		BIT(14)
>> +
>> +#define NFC_CMD_GET_SIZE(x)	(((x) >> 22) & GENMASK(4, 0))
>> +#define NFC_CMD_RB_TIMEOUT	0x18
> 
> Where does this value come from? Is it the typical timeout value,
> and if it is, what's the value in milli/microseconds?
> 
it is about 5.2ms when one nand cycle is 20ns and calculate it with the 
max erase time of toshiba slc flash.i think it should be taken from the 
sdr_timings.
>> +
>> +#define NFC_REG_CFG		0x04
>> +#define NFC_REG_DADR		0x08
>> +#define NFC_REG_IADR		0x0c
>> +#define NFC_REG_BUF		0x10
>> +#define NFC_REG_INFO		0x14
>> +#define NFC_REG_DC		0x18
>> +#define NFC_REG_ADR		0x1c
>> +#define NFC_REG_DL		0x20
>> +#define NFC_REG_DH		0x24
>> +#define NFC_REG_CADR		0x28
>> +#define NFC_REG_SADR		0x2c
>> +#define NFC_REG_PINS		0x30
>> +#define NFC_REG_VER		0x38
>> +
>> +#define NFC_RB_IRQ_EN		BIT(21)
>> +#define NFC_INT_MASK		(3 << 20)
>> +
>> +#define CMDRWGEN(cmd_dir, ran, bch, short_mode, page_size, pages)	\
>> +	(								\
>> +		(cmd_dir)			|			\
>> +		((ran) << 19)			|			\
>> +		((bch) << 14)			|			\
>> +		((short_mode) << 13)		|			\
>> +		(((page_size) & 0x7f) << 6)	|			\
>> +		((pages) & 0x3f)					\
>> +	)
>> +
>> +#define GENCMDDADDRL(adl, addr)		((adl) | ((addr) & 0xffff))
>> +#define GENCMDDADDRH(adh, addr)		((adh) | (((addr) >> 16) & 0xffff))
>> +#define GENCMDIADDRL(ail, addr)		((ail) | ((addr) & 0xffff))
>> +#define GENCMDIADDRH(aih, addr)		((aih) | (((addr) >> 16) & 0xffff))
>> +
>> +#define RB_STA(x)		(1 << (26 + (x)))
>> +#define DMA_DIR(dir)		((dir) ? NFC_CMD_N2M : NFC_CMD_M2N)
>> +
>> +#define ECC_CHECK_RETURN_FF	(-1)
>> +
>> +#define NAND_CE0		(0xe << 10)
>> +#define NAND_CE1		(0xd << 10)
>> +
>> +#define DMA_BUSY_TIMEOUT	0x100000
>> +#define CMD_FIFO_EMPTY_TIMEOUT	1000
>> +
>> +#define MAX_CE_NUM		2
>> +
>> +/* eMMC clock register, misc control */
>> +#define SD_EMMC_CLOCK		0x00
>> +#define CLK_ALWAYS_ON		BIT(28)
>> +#define CLK_SELECT_NAND		BIT(31)
>> +#define CLK_DIV_MASK		GENMASK(5, 0)
>> +
>> +#define NFC_CLK_CYCLE		6
>> +
>> +/* nand flash controller delay 3 ns */
>> +#define NFC_DEFAULT_DELAY	3000
>> +
>> +#define MAX_ECC_INDEX		10
>> +
>> +#define MUX_CLK_NUM_PARENTS	2
>> +
>> +#define ROW_ADDER(page, index)	(((page) >> (8 * (index))) & 0xff)
>> +#define MAX_CYCLE_ROW_ADDRS	3
>> +#define MAX_CYCLE_COLUMN_ADDRS	2
>> +#define DIRREAD			1
>> +#define DIRWRITE		0
>> +
>> +#define ECC_PARITY_BCH8_512B	14
>> +
>> +#define PER_INFO_BYTE		8
>> +#define ECC_GET_PROTECTED_OOB_BYTE(x, y)			\
>> +		((x) >> (8 * (y)) & GENMASK(7, 0))
> 
> 		(le64_to_cpu(x) >> (8 * (y)) & GENMASK(7, 0))
> 
>> +
>> +#define ECC_SET_PROTECTED_OOB_BYTE(x, y, z)			\
>> +	{							\
>> +		(x) &= (~((__le64)(0xff) << (8 * (y))));	\
> 
> It's probably better to memset(0) the info_buf so that you can drop
> this masking step.
> 
ok.

>> +		(x) |= ((__le64)(z) << (8 * (y)));		\
> 
> 		    |= cpu_to_le64((u64)z << (8 * (y)));
> 
>> +	}
>> +
>> +#define ECC_COMPLETE            BIT(31)
>> +#define ECC_ERR_CNT(x)		(((x) >> 24) & GENMASK(5, 0))
>> +#define ECC_ZERO_CNT(x)		(((x) >> 16) & GENMASK(5, 0))
>> +
>> +struct meson_nfc_nand_chip {
>> +	struct list_head node;
>> +	struct nand_chip nand;
>> +	int clk_rate;
>> +	int level1_divider;
>> +	int bus_timing;
>> +	int twb;
>> +	int tadl;
>> +
>> +	int bch_mode;
>> +	u8 *data_buf;
>> +	__le64 *info_buf;
>> +	int nsels;
>> +	u8 sels[0];
>> +};
> 
> Please use unsigned integers where having a negative value is not
> possible. I'm pretty sure this is the case of all int fields in this
> struct. The same applies to the other structs.
> 
ok.

> [...]
> 
>> +
>> +static int meson_nfc_wait_cmd_finish(struct meson_nfc *nfc,
>> +				     unsigned int timeout_ms)
>> +{
>> +	u32 cmd_size = 0;
>> +	int ret;
>> +
>> +	/* wait cmd fifo is empty */
>> +	ret = readl_poll_timeout(nfc->reg_base + NFC_REG_CMD, cmd_size,
>> +				 !NFC_CMD_GET_SIZE(cmd_size),
>> +				 10, timeout_ms * 1000);
> 
> I guess you could use the relaxed version here.
> 
ok.

>> +	if (ret)
>> +		dev_err(nfc->dev, "wait for empty cmd FIFO time out\n");
>> +
>> +	return ret;
>> +}
>> +
>> +static int meson_nfc_wait_dma_finish(struct meson_nfc *nfc)
>> +{
>> +	meson_nfc_drain_cmd(nfc);
>> +
>> +	return meson_nfc_wait_cmd_finish(nfc, DMA_BUSY_TIMEOUT);
>> +}
>> +
>> +static inline __le64 nfc_info_ptr(struct nand_chip *nand, int index)
> 
> Drop inline specifiers for things that are defined in .c files. The
> compiler should be smart enough to determine when inlining is
> appropriate.
> 
ok.

>> +{
>> +	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
>> +
>> +	return le64_to_cpu(meson_chip->info_buf[index]);
> 
> According to the function prototype, you should return
> meson_chip->info_buf[index] directly, but I'm not even sure why you
> need this helper. Just access meson_chip->info_buf[] directly.
> 
ok. i will drop this helper.

>> +}
>> +
> 
> [...]
> 
>> +
>> +static int meson_nfc_queue_rb(struct meson_nfc *nfc, int timeout_ms)
>> +{
>> +	u32 cmd, cfg;
>> +	int ret = 0;
>> +
>> +	meson_nfc_cmd_idle(nfc, nfc->timing.twb);
>> +	meson_nfc_drain_cmd(nfc);
>> +	meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);
>> +
>> +	cfg = readl(nfc->reg_base + NFC_REG_CFG);
>> +	cfg |= (1 << 21);
>> +	writel(cfg, nfc->reg_base + NFC_REG_CFG);
>> +
>> +	init_completion(&nfc->completion);
>> +
>> +	cmd = NFC_CMD_RB | NFC_CMD_RB_INT
>> +		| nfc->param.chip_select | NFC_CMD_RB_TIMEOUT;
>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +
>> +	ret = wait_for_completion_timeout(&nfc->completion,
>> +					  msecs_to_jiffies(timeout_ms));
>> +	if (ret == 0) {
>> +		dev_err(nfc->dev, "wait nand irq timeout\n");
>> +		ret = -1;
>> +	}
>> +	return ret;
>> +}
>> +
>> +static void meson_nfc_set_user_byte(struct nand_chip *nand, u8 *oob_buf)
> 
> 	       ^ meson_nfc_set_protected_oob_bytes()
> 
>> +{
>> +	__le64 info;
>> +	int i, count;
>> +
>> +	for (i = 0, count = 0; i < nand->ecc.steps; i++, count += 2) {
>> +		info = nfc_info_ptr(nand, i);
>> +		ECC_SET_PROTECTED_OOB_BYTE(info, 0, oob_buf[count]);
>> +		ECC_SET_PROTECTED_OOB_BYTE(info, 1, oob_buf[count + 1]);
> 
> This is a NOOP: info is a local variable, so, anything you put in there
> is just lost. It could work if nfc_info_ptr() was returning a pointer
> and the local info var was also a pointer, but that's not the case
> here.
> 
wow! i made a mistake. previously i used __le64 *info here, but i don't 
know when i changed it carelessly.
thank you for pointing out it, i will fix it.

> BTW, maybe you don't need the ECC_SET/GET_PROTECTED_OOB_BYTE() macros.
> Just do the conversion in the meson_nfc_get/set_protected_oob_bytes()
> functions.
> 
ok.


>> +	}
>> +}
>> +
>> +static void meson_nfc_get_user_byte(struct nand_chip *nand, u8 *oob_buf)
> 
> 	       ^ meson_nfc_get_protected_oob_bytes()
> 
>> +{
>> +	__le64 info;
>> +	int i, count;
>> +
>> +	for (i = 0, count = 0; i < nand->ecc.steps; i++, count += 2) {
>> +		info = nfc_info_ptr(nand, i);
>> +		oob_buf[count] = ECC_GET_PROTECTED_OOB_BYTE(info, 0);
>> +		oob_buf[count + 1] = ECC_GET_PROTECTED_OOB_BYTE(info, 1);
>> +	}
>> +}
>> +
>> +static int meson_nfc_ecc_correct(struct nand_chip *nand)
>> +{
>> +	struct mtd_info *mtd = nand_to_mtd(nand);
>> +	__le64 info;
>> +	u32 bitflips = 0, i;
>> +	int scramble;
>> +	u8 zero_cnt;
>> +
>> +	scramble = (nand->options & NAND_NEED_SCRAMBLING) ? 1 : 0;
>> +
>> +	for (i = 0; i < nand->ecc.steps; i++) {
>> +		info = nfc_info_ptr(nand, i);
>> +		if (ECC_ERR_CNT(info) == 0x3f) {
>> +			zero_cnt = ECC_ZERO_CNT(info);
>> +			if (scramble && zero_cnt < nand->ecc.strength)
>> +				return ECC_CHECK_RETURN_FF;
>> +			mtd->ecc_stats.failed++;
>> +			continue;
>> +		}
>> +		mtd->ecc_stats.corrected += ECC_ERR_CNT(info);
>> +		bitflips = max_t(u32, bitflips, ECC_ERR_CNT(info));
>> +	}
>> +
>> +	return bitflips;
>> +}
>> +
>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
>> +{
>> +	struct nand_chip *nand = mtd_to_nand(mtd);
>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>> +	u32 cmd;
>> +
>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +
>> +	meson_nfc_drain_cmd(nfc);
> 
> You probably don't want to drain the FIFO every time you read a byte on
> the bus, and I guess the INPUT FIFO is at least as big as the CMD
> FIFO, right? If that's the case, you should queue as much DRD cmd as
> possible and only sync when the user explicitly requests it or when
> the INPUT/READ FIFO is full.
> 
Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one 
nand cycle to read one byte and covers the 1st byte every time reading. 
i think nfc controller is faster than nand cycle, but really it is not 
high efficiency when reading so many bytes once.
Or use dma command here like read_page and read_page_raw.

>> +
>> +	meson_nfc_wait_cmd_finish(nfc, 1000);
>> +
>> +	return readb(nfc->reg_base + NFC_REG_BUF);
>> +}
>> +
>> +static void meson_nfc_read_buf(struct mtd_info *mtd, u8 *buf, int len)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < len; i++)
>> +		buf[i] = meson_nfc_read_byte(mtd);
>> +}
>> +
>> +static void meson_nfc_write_byte(struct mtd_info *mtd, u8 byte)
>> +{
>> +	struct meson_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
>> +	u32 cmd;
>> +
>> +	meson_nfc_cmd_idle(nfc, nfc->timing.twb);
>> +
>> +	cmd = nfc->param.chip_select | NFC_CMD_DWR | (byte & 0xff);
>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +
>> +	meson_nfc_cmd_idle(nfc, nfc->timing.twb);
> 
> Nope, tWB is not needed between 2 data-write cycles. It's only needed
> when a wait-RB is requested after a write.
> 
ok, i will remove it.

>> +	meson_nfc_cmd_idle(nfc, 0);
> 
> Why do you need that one?
> 
em, it doesn't need here.

>> +
>> +	meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);
> 
> As for the read_byte() implementation, I don't think you should force a
> sync here.
> 
ok, it can send 30 bytes (command fifo size subtract 2 idle command ) 
once with a sync. Or use dma command.

>> +}
>> +
>> +static void meson_nfc_write_buf(struct mtd_info *mtd,
>> +				const u8 *buf, int len)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < len; i++)
>> +		meson_nfc_write_byte(mtd, buf[i]);
>> +}
>> +
>> +static int meson_nfc_rw_cmd_prepare_and_execute(struct nand_chip *nand,
>> +						int page, bool in)
>> +{
>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>> +	const struct nand_sdr_timings *sdr =
>> +		nand_get_sdr_timings(&nand->data_interface);
>> +	int cs = nfc->param.chip_select;
>> +	int i, cmd0, cmd_num;
>> +	int ret = 0;
>> +
>> +	cmd0 = in ? NAND_CMD_READ0 : NAND_CMD_SEQIN;
>> +	cmd_num = sizeof(struct nand_rw_cmd) / sizeof(int);
>> +	if (!in)
>> +		cmd_num--;
>> +
>> +	nfc->cmdfifo.rw.cmd0 = cs | NFC_CMD_CLE | cmd0;
>> +	for (i = 0; i < MAX_CYCLE_COLUMN_ADDRS; i++)
>> +		nfc->cmdfifo.rw.col[i] = cs | NFC_CMD_ALE | 0;
>> +
>> +	for (i = 0; i < MAX_CYCLE_ROW_ADDRS; i++)
>> +		nfc->cmdfifo.rw.row[i] = cs | NFC_CMD_ALE | ROW_ADDER(page, i);
>> +
>> +	nfc->cmdfifo.rw.cmd1 = cs | NFC_CMD_CLE | NAND_CMD_READSTART;
> 
> Having a fixed size array for the column and row address cycles does
> not sound like a good idea to me (depending on the NAND chip you
> connect, the number of cycles for the row and column differ), which
> makes me realize the nand_rw_cmd struct is not such a good thing...
> 
em , i will fix it by adding the size of the column and row address.
is that ok?

>> +
>> +	for (i = 0; i < cmd_num; i++)
>> +		writel(nfc->cmdfifo.cmd[i], nfc->reg_base + NFC_REG_CMD);
> 
> ... why not write directly to the CMD reg?
> 

it seems that too many writel(xxx, nfc->reg_base + NFC_REG_CMD) in one 
function; so I want to cache all the commands and write it in a loop.

>> +
>> +	if (in)
>> +		meson_nfc_queue_rb(nfc, sdr->tR_max);
>> +	else
>> +		meson_nfc_cmd_idle(nfc, nfc->timing.tadl);
>> +
>> +	return ret;
>> +}
>> +
>> +static int meson_nfc_write_page_sub(struct nand_chip *nand, const u8 *buf,
>> +				    int page, int raw)
>> +{
>> +	struct mtd_info *mtd = nand_to_mtd(nand);
>> +	const struct nand_sdr_timings *sdr =
>> +		nand_get_sdr_timings(&nand->data_interface);
>> +	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>> +	dma_addr_t daddr, iaddr;
>> +	u32 cmd;
>> +	int ret;
>> +
>> +	daddr = dma_map_single(nfc->dev, (void *)meson_chip->data_buf,
>> +			       mtd->writesize + mtd->oobsize,
>> +			       DMA_TO_DEVICE);
>> +	ret = dma_mapping_error(nfc->dev, daddr);
>> +	if (ret) {
>> +		dev_err(nfc->dev, "dma mapping error\n");
>> +		goto err;
>> +	}
>> +
>> +	iaddr = dma_map_single(nfc->dev, (void *)meson_chip->info_buf,
>> +			       nand->ecc.steps * PER_INFO_BYTE,
>> +			       DMA_TO_DEVICE);
>> +	ret = dma_mapping_error(nfc->dev, iaddr);
>> +	if (ret) {
>> +		dev_err(nfc->dev, "dma mapping error\n");
>> +		goto err_map_daddr;
>> +	}
>> +
>> +	ret = meson_nfc_rw_cmd_prepare_and_execute(nand, page, DIRWRITE);
>> +	if (ret)
>> +		goto err_map_iaddr;
>> +
>> +	cmd = GENCMDDADDRL(NFC_CMD_ADL, daddr);
>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +
>> +	cmd = GENCMDDADDRH(NFC_CMD_ADH, daddr);
>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +
>> +	cmd = GENCMDIADDRL(NFC_CMD_AIL, iaddr);
>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +
>> +	cmd = GENCMDIADDRH(NFC_CMD_AIH, iaddr);
>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +
>> +	meson_nfc_cmd_seed(nfc, page);
>> +
>> +	meson_nfc_cmd_access(nand, raw, DIRWRITE);
>> +
>> +	ret = meson_nfc_wait_dma_finish(nfc);
>> +	cmd = nfc->param.chip_select | NFC_CMD_CLE | NAND_CMD_PAGEPROG;
>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +	meson_nfc_queue_rb(nfc, sdr->tPROG_max);
> 
> Don't you have a way to queue the PAGEPROG and WAIT_RB instructions
> before the DMA finishes?
> 

it runs:
1) dma transfer ddr data to nand page cache.
2) wait dma finish
3) send the PAGEPROG command
4) wait rb finish

meson_nfc_wait_dma_finish(nfc) waits command fifo empty. Maybe it is 
difficult or uncessary to queue the PAGEPROG command and WAIT_RB between 
1) and 2).

is that right?


>> +
>> +err_map_iaddr:
>> +	dma_unmap_single(nfc->dev, iaddr,
>> +			 nand->ecc.steps * PER_INFO_BYTE, DMA_TO_DEVICE);
>> +err_map_daddr:
>> +	dma_unmap_single(nfc->dev, daddr,
>> +			 mtd->writesize + mtd->oobsize, DMA_TO_DEVICE);
>> +err:
>> +	return ret;
>> +}
>> +
> 
> [...]
> 
>> +static int meson_nfc_exec_op(struct nand_chip *nand,
>> +			     const struct nand_operation *op, bool check_only)
>> +{
>> +	struct mtd_info *mtd = nand_to_mtd(nand);
>> +	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>> +	const struct nand_op_instr *instr = NULL;
>> +	int ret = 0, cmd;
>> +	unsigned int op_id;
>> +	int i, delay_idle;
>> +
>> +	for (op_id = 0; op_id < op->ninstrs; op_id++) {
>> +		instr = &op->instrs[op_id];
>> +		delay_idle = DIV_ROUND_UP(PSEC_TO_NSEC(instr->delay_ns),
>> +					  meson_chip->level1_divider
>> +					  * NFC_CLK_CYCLE);
> 
> On multi-line operations, put the operator on the previous line:
> 
ok, i will fix it.

> 					  meson_chip->level1_divider *
> 					  NFC_CLK_CYCLE);
> 
> 
>> +		switch (instr->type) {
>> +		case NAND_OP_CMD_INSTR:
>> +			cmd = nfc->param.chip_select | NFC_CMD_CLE;
>> +			cmd |= instr->ctx.cmd.opcode & 0xff;
>> +			writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +			meson_nfc_cmd_idle(nfc, delay_idle);
>> +			break;
>> +
>> +		case NAND_OP_ADDR_INSTR:
>> +			for (i = 0; i < instr->ctx.addr.naddrs; i++) {
>> +				cmd = nfc->param.chip_select | NFC_CMD_ALE;
>> +				cmd |= instr->ctx.addr.addrs[i] & 0xff;
>> +				writel(cmd, nfc->reg_base + NFC_REG_CMD);
>> +			}
>> +			meson_nfc_cmd_idle(nfc, delay_idle);
>> +			break;
>> +
>> +		case NAND_OP_DATA_IN_INSTR:
>> +			meson_nfc_read_buf(mtd, instr->ctx.data.buf.in,
>> +					   instr->ctx.data.len);
>> +			break;
>> +
>> +		case NAND_OP_DATA_OUT_INSTR:
>> +			meson_nfc_write_buf(mtd, instr->ctx.data.buf.out,
>> +					    instr->ctx.data.len);
>> +			break;
>> +
>> +		case NAND_OP_WAITRDY_INSTR:
>> +			meson_nfc_queue_rb(nfc, instr->ctx.waitrdy.timeout_ms);
>> +			meson_nfc_cmd_idle(nfc, delay_idle);
>> +			break;
>> +		}
> 
> I think you could move the meson_nfc_cmd_idle() call here, and only add
> it when instr->delay_ns != 0:
> > 		if (instr->delay_ns)
> 			meson_nfc_cmd_idle(nfc, delay_idle);
> 
ok, thank you.

>> +	}
> 
> Don't you need a call to meson_nfc_wait_cmd_finish() here?
> 
it will be called in queue_rb, read_buf and write_buf. but i have no 
idea whether it still needs to be called corresponding to 
CMD_INSTR/ADDR_INSTR.To be strict, it should be called.

>> +	return ret;
>> +}
> 
> .
>
Boris Brezillon Nov. 6, 2018, 9:28 a.m. UTC | #3
On Tue, 6 Nov 2018 17:08:00 +0800
Liang Yang <liang.yang@amlogic.com> wrote:

> On 2018/11/5 23:53, Boris Brezillon wrote:
> > On Fri, 2 Nov 2018 00:42:21 +0800
> > Jianxin Pan <jianxin.pan@amlogic.com> wrote:
> >   
> >> +#define NFC_REG_CMD		0x00
> >> +#define NFC_CMD_DRD		(0x8 << 14)
> >> +#define NFC_CMD_IDLE		(0xc << 14)
> >> +#define NFC_CMD_DWR		(0x4 << 14)
> >> +#define NFC_CMD_CLE		(0x5 << 14)
> >> +#define NFC_CMD_ALE		(0x6 << 14)
> >> +#define NFC_CMD_ADL		((0 << 16) | (3 << 20))
> >> +#define NFC_CMD_ADH		((1 << 16) | (3 << 20))
> >> +#define NFC_CMD_AIL		((2 << 16) | (3 << 20))
> >> +#define NFC_CMD_AIH		((3 << 16) | (3 << 20))
> >> +#define NFC_CMD_SEED		((8 << 16) | (3 << 20))
> >> +#define NFC_CMD_M2N		((0 << 17) | (2 << 20))
> >> +#define NFC_CMD_N2M		((1 << 17) | (2 << 20))
> >> +#define NFC_CMD_RB		BIT(20)
> >> +#define NFC_CMD_IO6		((0xb << 10) | (1 << 18))
> >> +#define NFC_CMD_SCRAMBLER_ENABLE	BIT(19)
> >> +#define NFC_CMD_RB_INT		BIT(14)
> >> +
> >> +#define NFC_CMD_GET_SIZE(x)	(((x) >> 22) & GENMASK(4, 0))
> >> +#define NFC_CMD_RB_TIMEOUT	0x18  
> > 
> > Where does this value come from? Is it the typical timeout value,
> > and if it is, what's the value in milli/microseconds?
> >   
> it is about 5.2ms when one nand cycle is 20ns and calculate it with the 
> max erase time of toshiba slc flash.i think it should be taken from the 
> sdr_timings.

Yes, it should. Or just pick the maximum value, since it's just a
timeout anyway and shouldn't expire if everything works as expected.

> >> +
> >> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
> >> +{
> >> +	struct nand_chip *nand = mtd_to_nand(mtd);
> >> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> >> +	u32 cmd;
> >> +
> >> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
> >> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> >> +
> >> +	meson_nfc_drain_cmd(nfc);  
> > 
> > You probably don't want to drain the FIFO every time you read a byte on
> > the bus, and I guess the INPUT FIFO is at least as big as the CMD
> > FIFO, right? If that's the case, you should queue as much DRD cmd as
> > possible and only sync when the user explicitly requests it or when
> > the INPUT/READ FIFO is full.
> >   
> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one 
> nand cycle to read one byte and covers the 1st byte every time reading. 
> i think nfc controller is faster than nand cycle, but really it is not 
> high efficiency when reading so many bytes once.
> Or use dma command here like read_page and read_page_raw.

Yep, that's also an alternative, though you'll have to make sure the
buffer passed through the nand_op_inst is DMA-safe, and use a bounce
buffer when that's not the case.

> >> +
> >> +	meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);  
> > 
> > As for the read_byte() implementation, I don't think you should force a
> > sync here.
> >   
> ok, it can send 30 bytes (command fifo size subtract 2 idle command ) 
> once with a sync.

Okay, still better than syncing after each transmitted byte.

> Or use dma command.

I guess that's the best option.

> 
> >> +}
> >> +
> >> +static void meson_nfc_write_buf(struct mtd_info *mtd,
> >> +				const u8 *buf, int len)
> >> +{
> >> +	int i;
> >> +
> >> +	for (i = 0; i < len; i++)
> >> +		meson_nfc_write_byte(mtd, buf[i]);
> >> +}
> >> +
> >> +static int meson_nfc_rw_cmd_prepare_and_execute(struct nand_chip *nand,
> >> +						int page, bool in)
> >> +{
> >> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> >> +	const struct nand_sdr_timings *sdr =
> >> +		nand_get_sdr_timings(&nand->data_interface);
> >> +	int cs = nfc->param.chip_select;
> >> +	int i, cmd0, cmd_num;
> >> +	int ret = 0;
> >> +
> >> +	cmd0 = in ? NAND_CMD_READ0 : NAND_CMD_SEQIN;
> >> +	cmd_num = sizeof(struct nand_rw_cmd) / sizeof(int);
> >> +	if (!in)
> >> +		cmd_num--;
> >> +
> >> +	nfc->cmdfifo.rw.cmd0 = cs | NFC_CMD_CLE | cmd0;
> >> +	for (i = 0; i < MAX_CYCLE_COLUMN_ADDRS; i++)
> >> +		nfc->cmdfifo.rw.col[i] = cs | NFC_CMD_ALE | 0;
> >> +
> >> +	for (i = 0; i < MAX_CYCLE_ROW_ADDRS; i++)
> >> +		nfc->cmdfifo.rw.row[i] = cs | NFC_CMD_ALE | ROW_ADDER(page, i);
> >> +
> >> +	nfc->cmdfifo.rw.cmd1 = cs | NFC_CMD_CLE | NAND_CMD_READSTART;  
> > 
> > Having a fixed size array for the column and row address cycles does
> > not sound like a good idea to me (depending on the NAND chip you
> > connect, the number of cycles for the row and column differ), which
> > makes me realize the nand_rw_cmd struct is not such a good thing...
> >   
> em , i will fix it by adding the size of the column and row address.
> is that ok?
> 
> >> +
> >> +	for (i = 0; i < cmd_num; i++)
> >> +		writel(nfc->cmdfifo.cmd[i], nfc->reg_base + NFC_REG_CMD);  
> > 
> > ... why not write directly to the CMD reg?
> >   
> 
> it seems that too many writel(xxx, nfc->reg_base + NFC_REG_CMD) in one 
> function; so I want to cache all the commands and write it in a loop.

Not sure why it makes a difference since you'll end up writing to
NFC_REG_CMD anyway.

BTW, you can probably use the writel_relaxed() instead of writel() when
writing to the CMD FIFO.

> 
> >> +
> >> +	if (in)
> >> +		meson_nfc_queue_rb(nfc, sdr->tR_max);
> >> +	else
> >> +		meson_nfc_cmd_idle(nfc, nfc->timing.tadl);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static int meson_nfc_write_page_sub(struct nand_chip *nand, const u8 *buf,
> >> +				    int page, int raw)
> >> +{
> >> +	struct mtd_info *mtd = nand_to_mtd(nand);
> >> +	const struct nand_sdr_timings *sdr =
> >> +		nand_get_sdr_timings(&nand->data_interface);
> >> +	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
> >> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> >> +	dma_addr_t daddr, iaddr;
> >> +	u32 cmd;
> >> +	int ret;
> >> +
> >> +	daddr = dma_map_single(nfc->dev, (void *)meson_chip->data_buf,
> >> +			       mtd->writesize + mtd->oobsize,
> >> +			       DMA_TO_DEVICE);
> >> +	ret = dma_mapping_error(nfc->dev, daddr);
> >> +	if (ret) {
> >> +		dev_err(nfc->dev, "dma mapping error\n");
> >> +		goto err;
> >> +	}
> >> +
> >> +	iaddr = dma_map_single(nfc->dev, (void *)meson_chip->info_buf,
> >> +			       nand->ecc.steps * PER_INFO_BYTE,
> >> +			       DMA_TO_DEVICE);
> >> +	ret = dma_mapping_error(nfc->dev, iaddr);
> >> +	if (ret) {
> >> +		dev_err(nfc->dev, "dma mapping error\n");
> >> +		goto err_map_daddr;
> >> +	}
> >> +
> >> +	ret = meson_nfc_rw_cmd_prepare_and_execute(nand, page, DIRWRITE);
> >> +	if (ret)
> >> +		goto err_map_iaddr;
> >> +
> >> +	cmd = GENCMDDADDRL(NFC_CMD_ADL, daddr);
> >> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> >> +
> >> +	cmd = GENCMDDADDRH(NFC_CMD_ADH, daddr);
> >> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> >> +
> >> +	cmd = GENCMDIADDRL(NFC_CMD_AIL, iaddr);
> >> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> >> +
> >> +	cmd = GENCMDIADDRH(NFC_CMD_AIH, iaddr);
> >> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> >> +
> >> +	meson_nfc_cmd_seed(nfc, page);
> >> +
> >> +	meson_nfc_cmd_access(nand, raw, DIRWRITE);
> >> +
> >> +	ret = meson_nfc_wait_dma_finish(nfc);
> >> +	cmd = nfc->param.chip_select | NFC_CMD_CLE | NAND_CMD_PAGEPROG;
> >> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> >> +	meson_nfc_queue_rb(nfc, sdr->tPROG_max);  
> > 
> > Don't you have a way to queue the PAGEPROG and WAIT_RB instructions
> > before the DMA finishes?
> >   
> 
> it runs:
> 1) dma transfer ddr data to nand page cache.
> 2) wait dma finish
> 3) send the PAGEPROG command
> 4) wait rb finish
> 
> meson_nfc_wait_dma_finish(nfc) waits command fifo empty. Maybe it is 
> difficult or uncessary to queue the PAGEPROG command and WAIT_RB between 
> 1) and 2).
> 
> is that right?
> 

Isn't the controller engine able to wait on the data transfer to be
complete before sending the next instruction in the CMD FIFO pipe?
I'm pretty sure it's able to do that, which would make
meson_nfc_wait_dma_finish() useless, and all you'd have to do is wait
for the CMD FIFO to be empty (assuming it guarantees the last command
has been executed).

> 
> >> +	}  
> > 
> > Don't you need a call to meson_nfc_wait_cmd_finish() here?
> >   
> it will be called in queue_rb, read_buf and write_buf. but i have no 
> idea whether it still needs to be called corresponding to 
> CMD_INSTR/ADDR_INSTR.To be strict, it should be called.

No, the synchronization is only needed before returning from
->exec_op(). Everything before can be queued without being
executed. Of course, if you run out of entries in the CMD/INPUT
FIFOs you'll have to do some sort of synchronization, but that should
be taken care of in your helpers.
Liang Yang Nov. 6, 2018, 10 a.m. UTC | #4
On 2018/11/6 17:28, Boris Brezillon wrote:
> On Tue, 6 Nov 2018 17:08:00 +0800
> Liang Yang <liang.yang@amlogic.com> wrote:
> 
>> On 2018/11/5 23:53, Boris Brezillon wrote:
>>> On Fri, 2 Nov 2018 00:42:21 +0800
>>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
>>>    
>>>> +
>>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
>>>> +{
>>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
>>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>> +	u32 cmd;
>>>> +
>>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>> +
>>>> +	meson_nfc_drain_cmd(nfc);
>>>
>>> You probably don't want to drain the FIFO every time you read a byte on
>>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
>>> FIFO, right? If that's the case, you should queue as much DRD cmd as
>>> possible and only sync when the user explicitly requests it or when
>>> the INPUT/READ FIFO is full.
>>>    
>> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
>> nand cycle to read one byte and covers the 1st byte every time reading.
>> i think nfc controller is faster than nand cycle, but really it is not
>> high efficiency when reading so many bytes once.
>> Or use dma command here like read_page and read_page_raw.
> 
> Yep, that's also an alternative, though you'll have to make sure the
> buffer passed through the nand_op_inst is DMA-safe, and use a bounce
> buffer when that's not the case.
> 
ok, i will try dma here.

>>>> +
>>>> +	meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);
>>>
>>> As for the read_byte() implementation, I don't think you should force a
>>> sync here.
>>>    
>> ok, it can send 30 bytes (command fifo size subtract 2 idle command )
>> once with a sync.
> 
> Okay, still better than syncing after each transmitted byte.
> 
>> Or use dma command.
> 
> I guess that's the best option.
> 
ok, i will try dma here.
>>
>>>> +}
>>>> +
>>>> +static void meson_nfc_write_buf(struct mtd_info *mtd,
>>>> +				const u8 *buf, int len)
>>>> +{
>>>> +	int i;
>>>> +
>>>> +	for (i = 0; i < len; i++)
>>>> +		meson_nfc_write_byte(mtd, buf[i]);
>>>> +}
>>>> +
>>>> +static int meson_nfc_rw_cmd_prepare_and_execute(struct nand_chip *nand,
>>>> +						int page, bool in)
>>>> +{
>>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>> +	const struct nand_sdr_timings *sdr =
>>>> +		nand_get_sdr_timings(&nand->data_interface);
>>>> +	int cs = nfc->param.chip_select;
>>>> +	int i, cmd0, cmd_num;
>>>> +	int ret = 0;
>>>> +
>>>> +	cmd0 = in ? NAND_CMD_READ0 : NAND_CMD_SEQIN;
>>>> +	cmd_num = sizeof(struct nand_rw_cmd) / sizeof(int);
>>>> +	if (!in)
>>>> +		cmd_num--;
>>>> +
>>>> +	nfc->cmdfifo.rw.cmd0 = cs | NFC_CMD_CLE | cmd0;
>>>> +	for (i = 0; i < MAX_CYCLE_COLUMN_ADDRS; i++)
>>>> +		nfc->cmdfifo.rw.col[i] = cs | NFC_CMD_ALE | 0;
>>>> +
>>>> +	for (i = 0; i < MAX_CYCLE_ROW_ADDRS; i++)
>>>> +		nfc->cmdfifo.rw.row[i] = cs | NFC_CMD_ALE | ROW_ADDER(page, i);
>>>> +
>>>> +	nfc->cmdfifo.rw.cmd1 = cs | NFC_CMD_CLE | NAND_CMD_READSTART;
>>>
>>> Having a fixed size array for the column and row address cycles does
>>> not sound like a good idea to me (depending on the NAND chip you
>>> connect, the number of cycles for the row and column differ), which
>>> makes me realize the nand_rw_cmd struct is not such a good thing...
>>>    
>> em , i will fix it by adding the size of the column and row address.
>> is that ok?
>>
>>>> +
>>>> +	for (i = 0; i < cmd_num; i++)
>>>> +		writel(nfc->cmdfifo.cmd[i], nfc->reg_base + NFC_REG_CMD);
>>>
>>> ... why not write directly to the CMD reg?
>>>    
>>
>> it seems that too many writel(xxx, nfc->reg_base + NFC_REG_CMD) in one
>> function; so I want to cache all the commands and write it in a loop.
> 
> Not sure why it makes a difference since you'll end up writing to
> NFC_REG_CMD anyway.
> 
> BTW, you can probably use the writel_relaxed() instead of writel() when
> writing to the CMD FIFO.
> 
ok.
>>
>>>> +
>>>> +	if (in)
>>>> +		meson_nfc_queue_rb(nfc, sdr->tR_max);
>>>> +	else
>>>> +		meson_nfc_cmd_idle(nfc, nfc->timing.tadl);
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static int meson_nfc_write_page_sub(struct nand_chip *nand, const u8 *buf,
>>>> +				    int page, int raw)
>>>> +{
>>>> +	struct mtd_info *mtd = nand_to_mtd(nand);
>>>> +	const struct nand_sdr_timings *sdr =
>>>> +		nand_get_sdr_timings(&nand->data_interface);
>>>> +	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
>>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>> +	dma_addr_t daddr, iaddr;
>>>> +	u32 cmd;
>>>> +	int ret;
>>>> +
>>>> +	daddr = dma_map_single(nfc->dev, (void *)meson_chip->data_buf,
>>>> +			       mtd->writesize + mtd->oobsize,
>>>> +			       DMA_TO_DEVICE);
>>>> +	ret = dma_mapping_error(nfc->dev, daddr);
>>>> +	if (ret) {
>>>> +		dev_err(nfc->dev, "dma mapping error\n");
>>>> +		goto err;
>>>> +	}
>>>> +
>>>> +	iaddr = dma_map_single(nfc->dev, (void *)meson_chip->info_buf,
>>>> +			       nand->ecc.steps * PER_INFO_BYTE,
>>>> +			       DMA_TO_DEVICE);
>>>> +	ret = dma_mapping_error(nfc->dev, iaddr);
>>>> +	if (ret) {
>>>> +		dev_err(nfc->dev, "dma mapping error\n");
>>>> +		goto err_map_daddr;
>>>> +	}
>>>> +
>>>> +	ret = meson_nfc_rw_cmd_prepare_and_execute(nand, page, DIRWRITE);
>>>> +	if (ret)
>>>> +		goto err_map_iaddr;
>>>> +
>>>> +	cmd = GENCMDDADDRL(NFC_CMD_ADL, daddr);
>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>> +
>>>> +	cmd = GENCMDDADDRH(NFC_CMD_ADH, daddr);
>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>> +
>>>> +	cmd = GENCMDIADDRL(NFC_CMD_AIL, iaddr);
>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>> +
>>>> +	cmd = GENCMDIADDRH(NFC_CMD_AIH, iaddr);
>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>> +
>>>> +	meson_nfc_cmd_seed(nfc, page);
>>>> +
>>>> +	meson_nfc_cmd_access(nand, raw, DIRWRITE);
>>>> +
>>>> +	ret = meson_nfc_wait_dma_finish(nfc);
>>>> +	cmd = nfc->param.chip_select | NFC_CMD_CLE | NAND_CMD_PAGEPROG;
>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>> +	meson_nfc_queue_rb(nfc, sdr->tPROG_max);
>>>
>>> Don't you have a way to queue the PAGEPROG and WAIT_RB instructions
>>> before the DMA finishes?
>>>    
>>
>> it runs:
>> 1) dma transfer ddr data to nand page cache.
>> 2) wait dma finish
>> 3) send the PAGEPROG command
>> 4) wait rb finish
>>
>> meson_nfc_wait_dma_finish(nfc) waits command fifo empty. Maybe it is
>> difficult or uncessary to queue the PAGEPROG command and WAIT_RB between
>> 1) and 2).
>>
>> is that right?
>>
> 
> Isn't the controller engine able to wait on the data transfer to be
> complete before sending the next instruction in the CMD FIFO pipe?
> I'm pretty sure it's able to do that, which would make
> meson_nfc_wait_dma_finish() useless, and all you'd have to do is wait
> for the CMD FIFO to be empty (assuming it guarantees the last command
> has been executed).
> Maybe the nfc design is difference. dedicated nfc dma engine is 
concatenated with the command fifo, there is no other status to check 
whether dma is done.

>>
>>>> +	}
>>>
>>> Don't you need a call to meson_nfc_wait_cmd_finish() here?
>>>    
>> it will be called in queue_rb, read_buf and write_buf. but i have no
>> idea whether it still needs to be called corresponding to
>> CMD_INSTR/ADDR_INSTR.To be strict, it should be called.
> 
> No, the synchronization is only needed before returning from
> ->exec_op(). Everything before can be queued without being
> executed. Of course, if you run out of entries in the CMD/INPUT
> FIFOs you'll have to do some sort of synchronization, but that should
> be taken care of in your helpers.
> 
ok, i will add it.
> .
>
Boris Brezillon Nov. 6, 2018, 10:22 a.m. UTC | #5
On Tue, 6 Nov 2018 18:00:37 +0800
Liang Yang <liang.yang@amlogic.com> wrote:

> On 2018/11/6 17:28, Boris Brezillon wrote:
> > On Tue, 6 Nov 2018 17:08:00 +0800
> > Liang Yang <liang.yang@amlogic.com> wrote:
> >   
> >> On 2018/11/5 23:53, Boris Brezillon wrote:  
> >>> On Fri, 2 Nov 2018 00:42:21 +0800
> >>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
> >>>      
> >>>> +
> >>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
> >>>> +{
> >>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
> >>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> >>>> +	u32 cmd;
> >>>> +
> >>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
> >>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> >>>> +
> >>>> +	meson_nfc_drain_cmd(nfc);  
> >>>
> >>> You probably don't want to drain the FIFO every time you read a byte on
> >>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
> >>> FIFO, right? If that's the case, you should queue as much DRD cmd as
> >>> possible and only sync when the user explicitly requests it or when
> >>> the INPUT/READ FIFO is full.
> >>>      
> >> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
> >> nand cycle to read one byte and covers the 1st byte every time reading.
> >> i think nfc controller is faster than nand cycle, but really it is not
> >> high efficiency when reading so many bytes once.
> >> Or use dma command here like read_page and read_page_raw.  
> > 
> > Yep, that's also an alternative, though you'll have to make sure the
> > buffer passed through the nand_op_inst is DMA-safe, and use a bounce
> > buffer when that's not the case.
> >   
> ok, i will try dma here.

We should probably expose the bounce buf handling as generic helpers at
the rawnand level:

void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
{
	void *buf;

	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
		return NULL;

	if (virt_addr_valid(instr->data.in) &&
	    !object_is_on_stack(instr->data.buf.in))
		return instr->data.buf.in;

	return kzalloc(instr->data.len, GFP_KERNEL);
}

void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
				    void *buf)
{
	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
	    WARN_ON(!buf))
		return;

	if (buf == instr->data.buf.in)
		return;

	memcpy(instr->data.buf.in, buf, instr->data.len);
	kfree(buf);
}

const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
{
	void *buf;

	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
		return NULL;

	if (virt_addr_valid(instr->data.out) &&
	    !object_is_on_stack(instr->data.buf.out))
		return instr->data.buf.out;

	return kmemdup(instr->data.buf.out, GFP_KERNEL);
}

void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
				    void *buf)
{
	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
	    WARN_ON(!buf))
		return;

	if (buf != instr->data.buf.out)
		kfree(buf);
}

> > 
> > Isn't the controller engine able to wait on the data transfer to be
> > complete before sending the next instruction in the CMD FIFO pipe?
> > I'm pretty sure it's able to do that, which would make
> > meson_nfc_wait_dma_finish() useless, and all you'd have to do is wait
> > for the CMD FIFO to be empty (assuming it guarantees the last command
> > has been executed).
> > Maybe the nfc design is difference. dedicated nfc dma engine is   
> concatenated with the command fifo, there is no other status to check 
> whether dma is done.

No, I mean that internally a "DMA-transfer" instruction probably
forces the NAND controller to wait for the DMA transfer to be finished
before dequeuing/executing the next instruction. So, it should be safe
to queue the PROG and WAIT_RB instructions and just rely on the "FIFO
empty" event to consider the operation as finished. Then, all you'll
have to do is check the status reg to determine whether the
write operation succeeded or not.
Liang Yang Nov. 6, 2018, 11:08 a.m. UTC | #6
On 2018/11/6 18:22, Boris Brezillon wrote:
> On Tue, 6 Nov 2018 18:00:37 +0800
> Liang Yang <liang.yang@amlogic.com> wrote:
> 
>> On 2018/11/6 17:28, Boris Brezillon wrote:
>>> On Tue, 6 Nov 2018 17:08:00 +0800
>>> Liang Yang <liang.yang@amlogic.com> wrote:
>>>    
>>>> On 2018/11/5 23:53, Boris Brezillon wrote:
>>>>> On Fri, 2 Nov 2018 00:42:21 +0800
>>>>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
>>>>>       
>>>>>> +
>>>>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
>>>>>> +{
>>>>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
>>>>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>>>> +	u32 cmd;
>>>>>> +
>>>>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
>>>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>>>> +
>>>>>> +	meson_nfc_drain_cmd(nfc);
>>>>>
>>>>> You probably don't want to drain the FIFO every time you read a byte on
>>>>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
>>>>> FIFO, right? If that's the case, you should queue as much DRD cmd as
>>>>> possible and only sync when the user explicitly requests it or when
>>>>> the INPUT/READ FIFO is full.
>>>>>       
>>>> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
>>>> nand cycle to read one byte and covers the 1st byte every time reading.
>>>> i think nfc controller is faster than nand cycle, but really it is not
>>>> high efficiency when reading so many bytes once.
>>>> Or use dma command here like read_page and read_page_raw.
>>>
>>> Yep, that's also an alternative, though you'll have to make sure the
>>> buffer passed through the nand_op_inst is DMA-safe, and use a bounce
>>> buffer when that's not the case.
>>>    
>> ok, i will try dma here.
> 
> We should probably expose the bounce buf handling as generic helpers at
> the rawnand level:
> 
> void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
> {
> 	void *buf;
> 
> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
> 		return NULL;
> 
> 	if (virt_addr_valid(instr->data.in) &&
> 	    !object_is_on_stack(instr->data.buf.in))
> 		return instr->data.buf.in;
> 
> 	return kzalloc(instr->data.len, GFP_KERNEL);
> }
> 
> void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
> 				    void *buf)
> {
> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
> 	    WARN_ON(!buf))
> 		return;
> 
> 	if (buf == instr->data.buf.in)
> 		return;
> 
> 	memcpy(instr->data.buf.in, buf, instr->data.len);
> 	kfree(buf);
> }
> 
> const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
> {
> 	void *buf;
> 
> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
> 		return NULL;
> 
> 	if (virt_addr_valid(instr->data.out) &&
> 	    !object_is_on_stack(instr->data.buf.out))
> 		return instr->data.buf.out;
> 
> 	return kmemdup(instr->data.buf.out, GFP_KERNEL);
> }
> 
> void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
> 				    void *buf)
> {
> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
> 	    WARN_ON(!buf))
> 		return;
> 
> 	if (buf != instr->data.buf.out)
> 		kfree(buf);
> }
>

that is more convenient.
i will use meson_chip->databuf as the bounce mid-buffer now.

>>>
>>> Isn't the controller engine able to wait on the data transfer to be
>>> complete before sending the next instruction in the CMD FIFO pipe?
>>> I'm pretty sure it's able to do that, which would make
>>> meson_nfc_wait_dma_finish() useless, and all you'd have to do is wait
>>> for the CMD FIFO to be empty (assuming it guarantees the last command
>>> has been executed).
>>> Maybe the nfc design is difference. dedicated nfc dma engine is
>> concatenated with the command fifo, there is no other status to check
>> whether dma is done.
> 
> No, I mean that internally a "DMA-transfer" instruction probably
> forces the NAND controller to wait for the DMA transfer to be finished
> before dequeuing/executing the next instruction. So, it should be safe
> to queue the PROG and WAIT_RB instructions and just rely on the "FIFO
> empty" event to consider the operation as finished. Then, all you'll
> have to do is check the status reg to determine whether the
> write operation succeeded or not.
> 
em, i got the point. indeed, until dma is checked done, nfc will execute 
next command in the command queue. so i will consider it.

> .
>
Boris Brezillon Nov. 6, 2018, 4:16 p.m. UTC | #7
On Tue, 6 Nov 2018 19:08:27 +0800
Liang Yang <liang.yang@amlogic.com> wrote:

> On 2018/11/6 18:22, Boris Brezillon wrote:
> > On Tue, 6 Nov 2018 18:00:37 +0800
> > Liang Yang <liang.yang@amlogic.com> wrote:
> >   
> >> On 2018/11/6 17:28, Boris Brezillon wrote:  
> >>> On Tue, 6 Nov 2018 17:08:00 +0800
> >>> Liang Yang <liang.yang@amlogic.com> wrote:
> >>>      
> >>>> On 2018/11/5 23:53, Boris Brezillon wrote:  
> >>>>> On Fri, 2 Nov 2018 00:42:21 +0800
> >>>>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
> >>>>>         
> >>>>>> +
> >>>>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
> >>>>>> +{
> >>>>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
> >>>>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> >>>>>> +	u32 cmd;
> >>>>>> +
> >>>>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
> >>>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> >>>>>> +
> >>>>>> +	meson_nfc_drain_cmd(nfc);  
> >>>>>
> >>>>> You probably don't want to drain the FIFO every time you read a byte on
> >>>>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
> >>>>> FIFO, right? If that's the case, you should queue as much DRD cmd as
> >>>>> possible and only sync when the user explicitly requests it or when
> >>>>> the INPUT/READ FIFO is full.
> >>>>>         
> >>>> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
> >>>> nand cycle to read one byte and covers the 1st byte every time reading.
> >>>> i think nfc controller is faster than nand cycle, but really it is not
> >>>> high efficiency when reading so many bytes once.
> >>>> Or use dma command here like read_page and read_page_raw.  
> >>>
> >>> Yep, that's also an alternative, though you'll have to make sure the
> >>> buffer passed through the nand_op_inst is DMA-safe, and use a bounce
> >>> buffer when that's not the case.
> >>>      
> >> ok, i will try dma here.  
> > 
> > We should probably expose the bounce buf handling as generic helpers at
> > the rawnand level:
> > 
> > void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
> > {
> > 	void *buf;
> > 
> > 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
> > 		return NULL;
> > 
> > 	if (virt_addr_valid(instr->data.in) &&
> > 	    !object_is_on_stack(instr->data.buf.in))
> > 		return instr->data.buf.in;
> > 
> > 	return kzalloc(instr->data.len, GFP_KERNEL);
> > }
> > 
> > void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
> > 				    void *buf)
> > {
> > 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
> > 	    WARN_ON(!buf))
> > 		return;
> > 
> > 	if (buf == instr->data.buf.in)
> > 		return;
> > 
> > 	memcpy(instr->data.buf.in, buf, instr->data.len);
> > 	kfree(buf);
> > }
> > 
> > const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
> > {
> > 	void *buf;
> > 
> > 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
> > 		return NULL;
> > 
> > 	if (virt_addr_valid(instr->data.out) &&
> > 	    !object_is_on_stack(instr->data.buf.out))
> > 		return instr->data.buf.out;
> > 
> > 	return kmemdup(instr->data.buf.out, GFP_KERNEL);
> > }
> > 
> > void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
> > 				    void *buf)
> > {
> > 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
> > 	    WARN_ON(!buf))
> > 		return;
> > 
> > 	if (buf != instr->data.buf.out)
> > 		kfree(buf);
> > }
> >  
> 
> that is more convenient.
> i will use meson_chip->databuf as the bounce mid-buffer now.

It won't work: the bounce buffer is allocated after the detection, and
the detection code is calling ->exec_op().

Just add a new patch to you series adding these helpers to nand_base.c.
Liang Yang Nov. 7, 2018, 2:13 a.m. UTC | #8
On 2018/11/7 0:16, Boris Brezillon wrote:
> On Tue, 6 Nov 2018 19:08:27 +0800
> Liang Yang <liang.yang@amlogic.com> wrote:
> 
>> On 2018/11/6 18:22, Boris Brezillon wrote:
>>> On Tue, 6 Nov 2018 18:00:37 +0800
>>> Liang Yang <liang.yang@amlogic.com> wrote:
>>>    
>>>> On 2018/11/6 17:28, Boris Brezillon wrote:
>>>>> On Tue, 6 Nov 2018 17:08:00 +0800
>>>>> Liang Yang <liang.yang@amlogic.com> wrote:
>>>>>       
>>>>>> On 2018/11/5 23:53, Boris Brezillon wrote:
>>>>>>> On Fri, 2 Nov 2018 00:42:21 +0800
>>>>>>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
>>>>>>>          
>>>>>>>> +
>>>>>>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
>>>>>>>> +{
>>>>>>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
>>>>>>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>>>>>> +	u32 cmd;
>>>>>>>> +
>>>>>>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
>>>>>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>>>>>> +
>>>>>>>> +	meson_nfc_drain_cmd(nfc);
>>>>>>>
>>>>>>> You probably don't want to drain the FIFO every time you read a byte on
>>>>>>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
>>>>>>> FIFO, right? If that's the case, you should queue as much DRD cmd as
>>>>>>> possible and only sync when the user explicitly requests it or when
>>>>>>> the INPUT/READ FIFO is full.
>>>>>>>          
>>>>>> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
>>>>>> nand cycle to read one byte and covers the 1st byte every time reading.
>>>>>> i think nfc controller is faster than nand cycle, but really it is not
>>>>>> high efficiency when reading so many bytes once.
>>>>>> Or use dma command here like read_page and read_page_raw.
>>>>>
>>>>> Yep, that's also an alternative, though you'll have to make sure the
>>>>> buffer passed through the nand_op_inst is DMA-safe, and use a bounce
>>>>> buffer when that's not the case.
>>>>>       
>>>> ok, i will try dma here.
>>>
>>> We should probably expose the bounce buf handling as generic helpers at
>>> the rawnand level:
>>>
>>> void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
>>> {
>>> 	void *buf;
>>>
>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
>>> 		return NULL;
>>>
>>> 	if (virt_addr_valid(instr->data.in) &&
>>> 	    !object_is_on_stack(instr->data.buf.in))
>>> 		return instr->data.buf.in;
>>>
>>> 	return kzalloc(instr->data.len, GFP_KERNEL);
>>> }
>>>
>>> void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
>>> 				    void *buf)
>>> {
>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
>>> 	    WARN_ON(!buf))
>>> 		return;
>>>
>>> 	if (buf == instr->data.buf.in)
>>> 		return;
>>>
>>> 	memcpy(instr->data.buf.in, buf, instr->data.len);
>>> 	kfree(buf);
>>> }
>>>
>>> const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
>>> {
>>> 	void *buf;
>>>
>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
>>> 		return NULL;
>>>
>>> 	if (virt_addr_valid(instr->data.out) &&
>>> 	    !object_is_on_stack(instr->data.buf.out))
>>> 		return instr->data.buf.out;
>>>
>>> 	return kmemdup(instr->data.buf.out, GFP_KERNEL);
>>> }
>>>
>>> void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
>>> 				    void *buf)
>>> {
>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
>>> 	    WARN_ON(!buf))
>>> 		return;
>>>
>>> 	if (buf != instr->data.buf.out)
>>> 		kfree(buf);
>>> }
>>>   
>>
>> that is more convenient.
>> i will use meson_chip->databuf as the bounce mid-buffer now.
> 
> It won't work: the bounce buffer is allocated after the detection, and
> the detection code is calling ->exec_op().
> 
> Just add a new patch to you series adding these helpers to nand_base.c.
>

ok

> .
>
Liang Yang Nov. 8, 2018, 7:41 a.m. UTC | #9
On 2018/11/7 0:16, Boris Brezillon wrote:
> On Tue, 6 Nov 2018 19:08:27 +0800
> Liang Yang <liang.yang@amlogic.com> wrote:
> 
>> On 2018/11/6 18:22, Boris Brezillon wrote:
>>> On Tue, 6 Nov 2018 18:00:37 +0800
>>> Liang Yang <liang.yang@amlogic.com> wrote:
>>>    
>>>> On 2018/11/6 17:28, Boris Brezillon wrote:
>>>>> On Tue, 6 Nov 2018 17:08:00 +0800
>>>>> Liang Yang <liang.yang@amlogic.com> wrote:
>>>>>       
>>>>>> On 2018/11/5 23:53, Boris Brezillon wrote:
>>>>>>> On Fri, 2 Nov 2018 00:42:21 +0800
>>>>>>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
>>>>>>>          
>>>>>>>> +
>>>>>>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
>>>>>>>> +{
>>>>>>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
>>>>>>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>>>>>> +	u32 cmd;
>>>>>>>> +
>>>>>>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
>>>>>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>>>>>> +
>>>>>>>> +	meson_nfc_drain_cmd(nfc);
>>>>>>>
>>>>>>> You probably don't want to drain the FIFO every time you read a byte on
>>>>>>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
>>>>>>> FIFO, right? If that's the case, you should queue as much DRD cmd as
>>>>>>> possible and only sync when the user explicitly requests it or when
>>>>>>> the INPUT/READ FIFO is full.
>>>>>>>          
>>>>>> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
>>>>>> nand cycle to read one byte and covers the 1st byte every time reading.
>>>>>> i think nfc controller is faster than nand cycle, but really it is not
>>>>>> high efficiency when reading so many bytes once.
>>>>>> Or use dma command here like read_page and read_page_raw.
>>>>>
>>>>> Yep, that's also an alternative, though you'll have to make sure the
>>>>> buffer passed through the nand_op_inst is DMA-safe, and use a bounce
>>>>> buffer when that's not the case.
>>>>>       
>>>> ok, i will try dma here.
>>>
>>> We should probably expose the bounce buf handling as generic helpers at
>>> the rawnand level:
>>>
>>> void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
>>> {
>>> 	void *buf;
>>>
>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
>>> 		return NULL;
>>>
>>> 	if (virt_addr_valid(instr->data.in) &&
>>> 	    !object_is_on_stack(instr->data.buf.in))
>>> 		return instr->data.buf.in;
>>>
>>> 	return kzalloc(instr->data.len, GFP_KERNEL);
>>> }
>>>
>>> void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
>>> 				    void *buf)
>>> {
>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
>>> 	    WARN_ON(!buf))
>>> 		return;
>>>
>>> 	if (buf == instr->data.buf.in)
>>> 		return;
>>>
>>> 	memcpy(instr->data.buf.in, buf, instr->data.len);
>>> 	kfree(buf);
>>> }
>>>
>>> const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
>>> {
>>> 	void *buf;
>>>
>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
>>> 		return NULL;
>>>
>>> 	if (virt_addr_valid(instr->data.out) &&
>>> 	    !object_is_on_stack(instr->data.buf.out))
>>> 		return instr->data.buf.out;
>>>
>>> 	return kmemdup(instr->data.buf.out, GFP_KERNEL);
>>> }
>>>
>>> void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
>>> 				    void *buf)
>>> {
>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
>>> 	    WARN_ON(!buf))
>>> 		return;
>>>
>>> 	if (buf != instr->data.buf.out)
>>> 		kfree(buf);
>>> }
>>>   
>>
>> that is more convenient.
>> i will use meson_chip->databuf as the bounce mid-buffer now.
> 
> It won't work: the bounce buffer is allocated after the detection, and
> the detection code is calling ->exec_op().
> 
> Just add a new patch to you series adding these helpers to nand_base.c.
> 

when using these helpers, i finally find that i still need a *info 
buffer* which is used to get status and ecc counter. even i don't need
to check *info buffer*, it is still needed. i just malloc *info_buffer* 
in driver now. and then i think some dedicated dma may need a minimum 
size(such as 4 bytes). it is luckly that meson nfc is not limited about 
the minimum size and i tested it.
so what is your suggestion about it?

> .
>
Miquel Raynal Nov. 12, 2018, 4:13 p.m. UTC | #10
Hello,

Boris Brezillon <boris.brezillon@bootlin.com> wrote on Tue, 6 Nov 2018
11:22:06 +0100:

> On Tue, 6 Nov 2018 18:00:37 +0800
> Liang Yang <liang.yang@amlogic.com> wrote:
> 
> > On 2018/11/6 17:28, Boris Brezillon wrote:  
> > > On Tue, 6 Nov 2018 17:08:00 +0800
> > > Liang Yang <liang.yang@amlogic.com> wrote:
> > >     
> > >> On 2018/11/5 23:53, Boris Brezillon wrote:    
> > >>> On Fri, 2 Nov 2018 00:42:21 +0800
> > >>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
> > >>>        
> > >>>> +
> > >>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
> > >>>> +{
> > >>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
> > >>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> > >>>> +	u32 cmd;
> > >>>> +
> > >>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
> > >>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> > >>>> +
> > >>>> +	meson_nfc_drain_cmd(nfc);    
> > >>>
> > >>> You probably don't want to drain the FIFO every time you read a byte on
> > >>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
> > >>> FIFO, right? If that's the case, you should queue as much DRD cmd as
> > >>> possible and only sync when the user explicitly requests it or when
> > >>> the INPUT/READ FIFO is full.
> > >>>        
> > >> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
> > >> nand cycle to read one byte and covers the 1st byte every time reading.
> > >> i think nfc controller is faster than nand cycle, but really it is not
> > >> high efficiency when reading so many bytes once.
> > >> Or use dma command here like read_page and read_page_raw.    
> > > 
> > > Yep, that's also an alternative, though you'll have to make sure the
> > > buffer passed through the nand_op_inst is DMA-safe, and use a bounce
> > > buffer when that's not the case.
> > >     
> > ok, i will try dma here.  
> 
> We should probably expose the bounce buf handling as generic helpers at
> the rawnand level:
> 
> void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
> {
> 	void *buf;
> 
> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
> 		return NULL;
> 
> 	if (virt_addr_valid(instr->data.in) &&
> 	    !object_is_on_stack(instr->data.buf.in))
> 		return instr->data.buf.in;
> 
> 	return kzalloc(instr->data.len, GFP_KERNEL);
> }
> 
> void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
> 				    void *buf)
> {
> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
> 	    WARN_ON(!buf))
> 		return;
> 
> 	if (buf == instr->data.buf.in)
> 		return;
> 
> 	memcpy(instr->data.buf.in, buf, instr->data.len);
> 	kfree(buf);
> }
> 
> const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
> {
> 	void *buf;
> 
> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
> 		return NULL;
> 
> 	if (virt_addr_valid(instr->data.out) &&
> 	    !object_is_on_stack(instr->data.buf.out))
> 		return instr->data.buf.out;
> 
> 	return kmemdup(instr->data.buf.out, GFP_KERNEL);
> }
> 
> void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
> 				    void *buf)
> {
> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
> 	    WARN_ON(!buf))
> 		return;
> 
> 	if (buf != instr->data.buf.out)
> 		kfree(buf);
> }

Not that I am against such function, but maybe they should come with
comments stating that there is no reliable way to find if a buffer is
DMA-able at runtime and these are just sanity checks (ie. required, but
probably not enough). This is my understanding of Wolfram's recent talk
at ELCE [1]. I suppose using the CONFIG_DMA_API_DEBUG option could help
more reliably to find such issues.

[1] https://www.youtube.com/watch?v=JDwaMClvV-s

Thanks,
Miquèl
Boris Brezillon Nov. 12, 2018, 4:54 p.m. UTC | #11
+Wolfram to give some inputs on the DMA issue.

On Mon, 12 Nov 2018 17:13:51 +0100
Miquel Raynal <miquel.raynal@bootlin.com> wrote:

> Hello,
> 
> Boris Brezillon <boris.brezillon@bootlin.com> wrote on Tue, 6 Nov 2018
> 11:22:06 +0100:
> 
> > On Tue, 6 Nov 2018 18:00:37 +0800
> > Liang Yang <liang.yang@amlogic.com> wrote:
> >   
> > > On 2018/11/6 17:28, Boris Brezillon wrote:    
> > > > On Tue, 6 Nov 2018 17:08:00 +0800
> > > > Liang Yang <liang.yang@amlogic.com> wrote:
> > > >       
> > > >> On 2018/11/5 23:53, Boris Brezillon wrote:      
> > > >>> On Fri, 2 Nov 2018 00:42:21 +0800
> > > >>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
> > > >>>          
> > > >>>> +
> > > >>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
> > > >>>> +{
> > > >>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
> > > >>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> > > >>>> +	u32 cmd;
> > > >>>> +
> > > >>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
> > > >>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> > > >>>> +
> > > >>>> +	meson_nfc_drain_cmd(nfc);      
> > > >>>
> > > >>> You probably don't want to drain the FIFO every time you read a byte on
> > > >>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
> > > >>> FIFO, right? If that's the case, you should queue as much DRD cmd as
> > > >>> possible and only sync when the user explicitly requests it or when
> > > >>> the INPUT/READ FIFO is full.
> > > >>>          
> > > >> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
> > > >> nand cycle to read one byte and covers the 1st byte every time reading.
> > > >> i think nfc controller is faster than nand cycle, but really it is not
> > > >> high efficiency when reading so many bytes once.
> > > >> Or use dma command here like read_page and read_page_raw.      
> > > > 
> > > > Yep, that's also an alternative, though you'll have to make sure the
> > > > buffer passed through the nand_op_inst is DMA-safe, and use a bounce
> > > > buffer when that's not the case.
> > > >       
> > > ok, i will try dma here.    
> > 
> > We should probably expose the bounce buf handling as generic helpers at
> > the rawnand level:
> > 
> > void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
> > {
> > 	void *buf;
> > 
> > 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
> > 		return NULL;
> > 
> > 	if (virt_addr_valid(instr->data.in) &&
> > 	    !object_is_on_stack(instr->data.buf.in))
> > 		return instr->data.buf.in;
> > 
> > 	return kzalloc(instr->data.len, GFP_KERNEL);
> > }
> > 
> > void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
> > 				    void *buf)
> > {
> > 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
> > 	    WARN_ON(!buf))
> > 		return;
> > 
> > 	if (buf == instr->data.buf.in)
> > 		return;
> > 
> > 	memcpy(instr->data.buf.in, buf, instr->data.len);
> > 	kfree(buf);
> > }
> > 
> > const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
> > {
> > 	void *buf;
> > 
> > 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
> > 		return NULL;
> > 
> > 	if (virt_addr_valid(instr->data.out) &&
> > 	    !object_is_on_stack(instr->data.buf.out))
> > 		return instr->data.buf.out;
> > 
> > 	return kmemdup(instr->data.buf.out, GFP_KERNEL);
> > }
> > 
> > void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
> > 				    void *buf)
> > {
> > 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
> > 	    WARN_ON(!buf))
> > 		return;
> > 
> > 	if (buf != instr->data.buf.out)
> > 		kfree(buf);
> > }  
> 
> Not that I am against such function, but maybe they should come with
> comments stating that there is no reliable way to find if a buffer is
> DMA-able at runtime and these are just sanity checks (ie. required, but
> probably not enough).

It's not 100% reliable, but it should cover most cases. Note that the
NAND framework already uses virt_addr_valid() to decide when to use its
internal bounce buffer, so this should be fixed too if we want a fully
reliable solution.

> This is my understanding of Wolfram's recent talk
> at ELCE [1].

Yes, you're right, but the NAND framework does not provide any guarantee
on the buf passed to ->exec_op() simply because the MTD layer does not
provide such a guarantee. Reworking that to match how the i2c framework
handles it is possible (with a flag set when the buffer is known to be
DMA-safe), but it requires rewriting all MTD users if we want to keep
decent perfs (the amount of data transfered to a flash is an order of
magnitude bigger than what you usually receive/send from/to an I2C
device). Also, I'm not even sure the DMA_SAFE flag covers all weird
cases like the "DMA engine embedded in the controller is not able to
access the whole physical memory range" one. So ideally we should have
something that checks if a pointer is DMA-safe at the device level and
then at the arch level.

A temporary solution would be to add a hook at the nand_controller
level:

	bool (*buf_is_dma_safe)(struct nand_chip *chip, void *buf,
				size_t len);

And then fallback to the default implementation when it's not
implemented:

static bool nand_buf_is_dma_safe(struct nand_chip *chip, void *buf,
				 size_t len)
{
	if (chip->controller->ops && chip->controller->ops->is_dma_safe)
		return chip->controller->ops->is_dma_safe(chip, buf,
							  len);

	return virt_addr_valid(buf) && !object_is_on_stack(buf);
}

> I suppose using the CONFIG_DMA_API_DEBUG option could help
> more reliably to find such issues.

Actually, the problem is not only about detecting offenders but being
able to detect when a buffer is not DMA-safe at runtime in order to
allocate/use a bounce buffer.
Boris Brezillon Nov. 12, 2018, 5:45 p.m. UTC | #12
On Mon, 12 Nov 2018 17:54:16 +0100
Boris Brezillon <boris.brezillon@bootlin.com> wrote:

> +Wolfram to give some inputs on the DMA issue.
> 
> On Mon, 12 Nov 2018 17:13:51 +0100
> Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> 
> > Hello,
> > 
> > Boris Brezillon <boris.brezillon@bootlin.com> wrote on Tue, 6 Nov 2018
> > 11:22:06 +0100:
> >   
> > > On Tue, 6 Nov 2018 18:00:37 +0800
> > > Liang Yang <liang.yang@amlogic.com> wrote:
> > >     
> > > > On 2018/11/6 17:28, Boris Brezillon wrote:      
> > > > > On Tue, 6 Nov 2018 17:08:00 +0800
> > > > > Liang Yang <liang.yang@amlogic.com> wrote:
> > > > >         
> > > > >> On 2018/11/5 23:53, Boris Brezillon wrote:        
> > > > >>> On Fri, 2 Nov 2018 00:42:21 +0800
> > > > >>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
> > > > >>>            
> > > > >>>> +
> > > > >>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
> > > > >>>> +{
> > > > >>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
> > > > >>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
> > > > >>>> +	u32 cmd;
> > > > >>>> +
> > > > >>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
> > > > >>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
> > > > >>>> +
> > > > >>>> +	meson_nfc_drain_cmd(nfc);        
> > > > >>>
> > > > >>> You probably don't want to drain the FIFO every time you read a byte on
> > > > >>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
> > > > >>> FIFO, right? If that's the case, you should queue as much DRD cmd as
> > > > >>> possible and only sync when the user explicitly requests it or when
> > > > >>> the INPUT/READ FIFO is full.
> > > > >>>            
> > > > >> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
> > > > >> nand cycle to read one byte and covers the 1st byte every time reading.
> > > > >> i think nfc controller is faster than nand cycle, but really it is not
> > > > >> high efficiency when reading so many bytes once.
> > > > >> Or use dma command here like read_page and read_page_raw.        
> > > > > 
> > > > > Yep, that's also an alternative, though you'll have to make sure the
> > > > > buffer passed through the nand_op_inst is DMA-safe, and use a bounce
> > > > > buffer when that's not the case.
> > > > >         
> > > > ok, i will try dma here.      
> > > 
> > > We should probably expose the bounce buf handling as generic helpers at
> > > the rawnand level:
> > > 
> > > void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
> > > {
> > > 	void *buf;
> > > 
> > > 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
> > > 		return NULL;
> > > 
> > > 	if (virt_addr_valid(instr->data.in) &&
> > > 	    !object_is_on_stack(instr->data.buf.in))
> > > 		return instr->data.buf.in;
> > > 
> > > 	return kzalloc(instr->data.len, GFP_KERNEL);
> > > }
> > > 
> > > void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
> > > 				    void *buf)
> > > {
> > > 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
> > > 	    WARN_ON(!buf))
> > > 		return;
> > > 
> > > 	if (buf == instr->data.buf.in)
> > > 		return;
> > > 
> > > 	memcpy(instr->data.buf.in, buf, instr->data.len);
> > > 	kfree(buf);
> > > }
> > > 
> > > const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
> > > {
> > > 	void *buf;
> > > 
> > > 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
> > > 		return NULL;
> > > 
> > > 	if (virt_addr_valid(instr->data.out) &&
> > > 	    !object_is_on_stack(instr->data.buf.out))
> > > 		return instr->data.buf.out;
> > > 
> > > 	return kmemdup(instr->data.buf.out, GFP_KERNEL);
> > > }
> > > 
> > > void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
> > > 				    void *buf)
> > > {
> > > 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
> > > 	    WARN_ON(!buf))
> > > 		return;
> > > 
> > > 	if (buf != instr->data.buf.out)
> > > 		kfree(buf);
> > > }    
> > 
> > Not that I am against such function, but maybe they should come with
> > comments stating that there is no reliable way to find if a buffer is
> > DMA-able at runtime and these are just sanity checks (ie. required, but
> > probably not enough).  
> 
> It's not 100% reliable, but it should cover most cases. Note that the
> NAND framework already uses virt_addr_valid() to decide when to use its
> internal bounce buffer, so this should be fixed too if we want a fully
> reliable solution.
> 
> > This is my understanding of Wolfram's recent talk
> > at ELCE [1].  
> 
> Yes, you're right, but the NAND framework does not provide any guarantee
> on the buf passed to ->exec_op() simply because the MTD layer does not
> provide such a guarantee. Reworking that to match how the i2c framework
> handles it is possible (with a flag set when the buffer is known to be
> DMA-safe), but it requires rewriting all MTD users if we want to keep
> decent perfs (the amount of data transfered to a flash is an order of
> magnitude bigger than what you usually receive/send from/to an I2C
> device). Also, I'm not even sure the DMA_SAFE flag covers all weird
> cases like the "DMA engine embedded in the controller is not able to
> access the whole physical memory range" one.

I forgot that this problem was handled at dma_map time (a bounce
buffer is allocated if needed, and this decision is based on
dev->dma_mask).

> So ideally we should have
> something that checks if a pointer is DMA-safe at the device level and
> then at the arch level.
> 
> A temporary solution would be to add a hook at the nand_controller
> level:
> 
> 	bool (*buf_is_dma_safe)(struct nand_chip *chip, void *buf,
> 				size_t len);
> 
> And then fallback to the default implementation when it's not
> implemented:
> 
> static bool nand_buf_is_dma_safe(struct nand_chip *chip, void *buf,
> 				 size_t len)
> {
> 	if (chip->controller->ops && chip->controller->ops->is_dma_safe)
> 		return chip->controller->ops->is_dma_safe(chip, buf,
> 							  len);
> 
> 	return virt_addr_valid(buf) && !object_is_on_stack(buf);
> }
> 
> > I suppose using the CONFIG_DMA_API_DEBUG option could help
> > more reliably to find such issues.  
> 
> Actually, the problem is not only about detecting offenders but being
> able to detect when a buffer is not DMA-safe at runtime in order to
> allocate/use a bounce buffer.
Liang Yang Nov. 15, 2018, 11:25 a.m. UTC | #13
Hi Boris,

I have implemented dma access base on these helpers you provided below.
we prepare to send v7 version now, so when will these helpers be pushed?

On 2018/11/13 1:45, Boris Brezillon wrote:
> On Mon, 12 Nov 2018 17:54:16 +0100
> Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> 
>> +Wolfram to give some inputs on the DMA issue.
>>
>> On Mon, 12 Nov 2018 17:13:51 +0100
>> Miquel Raynal <miquel.raynal@bootlin.com> wrote:
>>
>>> Hello,
>>>
>>> Boris Brezillon <boris.brezillon@bootlin.com> wrote on Tue, 6 Nov 2018
>>> 11:22:06 +0100:
>>>    
>>>> On Tue, 6 Nov 2018 18:00:37 +0800
>>>> Liang Yang <liang.yang@amlogic.com> wrote:
>>>>      
>>>>> On 2018/11/6 17:28, Boris Brezillon wrote:
>>>>>> On Tue, 6 Nov 2018 17:08:00 +0800
>>>>>> Liang Yang <liang.yang@amlogic.com> wrote:
>>>>>>          
>>>>>>> On 2018/11/5 23:53, Boris Brezillon wrote:
>>>>>>>> On Fri, 2 Nov 2018 00:42:21 +0800
>>>>>>>> Jianxin Pan <jianxin.pan@amlogic.com> wrote:
>>>>>>>>             
>>>>>>>>> +
>>>>>>>>> +static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
>>>>>>>>> +{
>>>>>>>>> +	struct nand_chip *nand = mtd_to_nand(mtd);
>>>>>>>>> +	struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>>>>>>> +	u32 cmd;
>>>>>>>>> +
>>>>>>>>> +	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
>>>>>>>>> +	writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>>>>>>> +
>>>>>>>>> +	meson_nfc_drain_cmd(nfc);
>>>>>>>>
>>>>>>>> You probably don't want to drain the FIFO every time you read a byte on
>>>>>>>> the bus, and I guess the INPUT FIFO is at least as big as the CMD
>>>>>>>> FIFO, right? If that's the case, you should queue as much DRD cmd as
>>>>>>>> possible and only sync when the user explicitly requests it or when
>>>>>>>> the INPUT/READ FIFO is full.
>>>>>>>>             
>>>>>>> Register 'NFC_REG_BUF' can holds only 4 bytes, also DRD sends only one
>>>>>>> nand cycle to read one byte and covers the 1st byte every time reading.
>>>>>>> i think nfc controller is faster than nand cycle, but really it is not
>>>>>>> high efficiency when reading so many bytes once.
>>>>>>> Or use dma command here like read_page and read_page_raw.
>>>>>>
>>>>>> Yep, that's also an alternative, though you'll have to make sure the
>>>>>> buffer passed through the nand_op_inst is DMA-safe, and use a bounce
>>>>>> buffer when that's not the case.
>>>>>>          
>>>>> ok, i will try dma here.
>>>>
>>>> We should probably expose the bounce buf handling as generic helpers at
>>>> the rawnand level:
>>>>
>>>> void *nand_op_get_dma_safe_input_buf(struct nand_op_instr *instr)
>>>> {
>>>> 	void *buf;
>>>>
>>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR))
>>>> 		return NULL;
>>>>
>>>> 	if (virt_addr_valid(instr->data.in) &&
>>>> 	    !object_is_on_stack(instr->data.buf.in))
>>>> 		return instr->data.buf.in;
>>>>
>>>> 	return kzalloc(instr->data.len, GFP_KERNEL);
>>>> }
>>>>
>>>> void nand_op_put_dma_safe_input_buf(struct nand_op_instr *instr,
>>>> 				    void *buf)
>>>> {
>>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_IN_INSTR) ||
>>>> 	    WARN_ON(!buf))
>>>> 		return;
>>>>
>>>> 	if (buf == instr->data.buf.in)
>>>> 		return;
>>>>
>>>> 	memcpy(instr->data.buf.in, buf, instr->data.len);
>>>> 	kfree(buf);
>>>> }
>>>>
>>>> const void *nand_op_get_dma_safe_output_buf(struct nand_op_instr *instr)
>>>> {
>>>> 	void *buf;
>>>>
>>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR))
>>>> 		return NULL;
>>>>
>>>> 	if (virt_addr_valid(instr->data.out) &&
>>>> 	    !object_is_on_stack(instr->data.buf.out))
>>>> 		return instr->data.buf.out;
>>>>
>>>> 	return kmemdup(instr->data.buf.out, GFP_KERNEL);
>>>> }
>>>>
>>>> void nand_op_put_dma_safe_output_buf(struct nand_op_instr *instr,
>>>> 				    void *buf)
>>>> {
>>>> 	if (WARN_ON(instr->type != NAND_OP_DATA_OUT_INSTR) ||
>>>> 	    WARN_ON(!buf))
>>>> 		return;
>>>>
>>>> 	if (buf != instr->data.buf.out)
>>>> 		kfree(buf);
>>>> }
>>>
>>> Not that I am against such function, but maybe they should come with
>>> comments stating that there is no reliable way to find if a buffer is
>>> DMA-able at runtime and these are just sanity checks (ie. required, but
>>> probably not enough).
>>
>> It's not 100% reliable, but it should cover most cases. Note that the
>> NAND framework already uses virt_addr_valid() to decide when to use its
>> internal bounce buffer, so this should be fixed too if we want a fully
>> reliable solution.
>>
>>> This is my understanding of Wolfram's recent talk
>>> at ELCE [1].
>>
>> Yes, you're right, but the NAND framework does not provide any guarantee
>> on the buf passed to ->exec_op() simply because the MTD layer does not
>> provide such a guarantee. Reworking that to match how the i2c framework
>> handles it is possible (with a flag set when the buffer is known to be
>> DMA-safe), but it requires rewriting all MTD users if we want to keep
>> decent perfs (the amount of data transfered to a flash is an order of
>> magnitude bigger than what you usually receive/send from/to an I2C
>> device). Also, I'm not even sure the DMA_SAFE flag covers all weird
>> cases like the "DMA engine embedded in the controller is not able to
>> access the whole physical memory range" one.
> 
> I forgot that this problem was handled at dma_map time (a bounce
> buffer is allocated if needed, and this decision is based on
> dev->dma_mask).
> 
>> So ideally we should have
>> something that checks if a pointer is DMA-safe at the device level and
>> then at the arch level.
>>
>> A temporary solution would be to add a hook at the nand_controller
>> level:
>>
>> 	bool (*buf_is_dma_safe)(struct nand_chip *chip, void *buf,
>> 				size_t len);
>>
>> And then fallback to the default implementation when it's not
>> implemented:
>>
>> static bool nand_buf_is_dma_safe(struct nand_chip *chip, void *buf,
>> 				 size_t len)
>> {
>> 	if (chip->controller->ops && chip->controller->ops->is_dma_safe)
>> 		return chip->controller->ops->is_dma_safe(chip, buf,
>> 							  len);
>>
>> 	return virt_addr_valid(buf) && !object_is_on_stack(buf);
>> }
>>
>>> I suppose using the CONFIG_DMA_API_DEBUG option could help
>>> more reliably to find such issues.
>>
>> Actually, the problem is not only about detecting offenders but being
>> able to detect when a buffer is not DMA-safe at runtime in order to
>> allocate/use a bounce buffer.
> 
> .
>
Miquel Raynal Nov. 15, 2018, 1:04 p.m. UTC | #14
Hi Liang,

Liang Yang <liang.yang@amlogic.com> wrote on Thu, 15 Nov 2018 19:25:07
+0800:

> Hi Boris,
> 
> I have implemented dma access base on these helpers you provided below.
> we prepare to send v7 version now, so when will these helpers be pushed?

Thanks for your work. You can send the v7 so we will have a look at the
overall driver; but since we raised the DMA buffers issue we had a
discussion with Boris about how to handle them and I think we are going
to adopt the same solution as Wolfram in the I2C subsystem: manual
flagging. Sadly, this is probably the best we can do to ensure proper
DMA support.

There is nothing set is stone yet but I started a small rework to
handle MTD operations differently (and add a DMA_SAFE flag), you can
have a look there [1]. Don't base your work on it for now as it is just
a preliminary version, subject to big changes.

[1] https://github.com/miquelraynal/linux/commits/dma-safe-buffers

Thanks,
Miquèl
Boris Brezillon Nov. 15, 2018, 1:09 p.m. UTC | #15
On Thu, 15 Nov 2018 14:04:00 +0100
Miquel Raynal <miquel.raynal@bootlin.com> wrote:

> Hi Liang,
> 
> Liang Yang <liang.yang@amlogic.com> wrote on Thu, 15 Nov 2018 19:25:07
> +0800:
> 
> > Hi Boris,
> > 
> > I have implemented dma access base on these helpers you provided below.
> > we prepare to send v7 version now, so when will these helpers be pushed?  
> 
> Thanks for your work. You can send the v7 so we will have a look at the
> overall driver; but since we raised the DMA buffers issue we had a
> discussion with Boris about how to handle them and I think we are going
> to adopt the same solution as Wolfram in the I2C subsystem: manual
> flagging. Sadly, this is probably the best we can do to ensure proper
> DMA support.
> 
> There is nothing set is stone yet but I started a small rework to
> handle MTD operations differently (and add a DMA_SAFE flag), you can
> have a look there [1]. Don't base your work on it for now as it is just
> a preliminary version, subject to big changes.

In order to not block the driver, I'd suggest that you move the helper
I proposed directly into your driver and prefix them with 'meson_'.
Liang Yang Nov. 16, 2018, 8:29 a.m. UTC | #16
Hi Boris and Miquel,

understand. i will move helpers into nfc driver to avoid some errors 
when sending the patch.

On 2018/11/15 21:09, Boris Brezillon wrote:
> On Thu, 15 Nov 2018 14:04:00 +0100
> Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> 
>> Hi Liang,
>>
>> Liang Yang <liang.yang@amlogic.com> wrote on Thu, 15 Nov 2018 19:25:07
>> +0800:
>>
>>> Hi Boris,
>>>
>>> I have implemented dma access base on these helpers you provided below.
>>> we prepare to send v7 version now, so when will these helpers be pushed?
>>
>> Thanks for your work. You can send the v7 so we will have a look at the
>> overall driver; but since we raised the DMA buffers issue we had a
>> discussion with Boris about how to handle them and I think we are going
>> to adopt the same solution as Wolfram in the I2C subsystem: manual
>> flagging. Sadly, this is probably the best we can do to ensure proper
>> DMA support.
>>
>> There is nothing set is stone yet but I started a small rework to
>> handle MTD operations differently (and add a DMA_SAFE flag), you can
>> have a look there [1]. Don't base your work on it for now as it is just
>> a preliminary version, subject to big changes.
> 
> In order to not block the driver, I'd suggest that you move the helper
> I proposed directly into your driver and prefix them with 'meson_'.
> 
> .
>
diff mbox series

Patch

diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig
index c7efc31..223b041 100644
--- a/drivers/mtd/nand/raw/Kconfig
+++ b/drivers/mtd/nand/raw/Kconfig
@@ -541,4 +541,14 @@  config MTD_NAND_TEGRA
 	  is supported. Extra OOB bytes when using HW ECC are currently
 	  not supported.
 
+config MTD_NAND_MESON
+	tristate "Support for NAND controller on Amlogic's Meson SoCs"
+	depends on ARCH_MESON || COMPILE_TEST
+	depends on COMMON_CLK_AMLOGIC
+	select COMMON_CLK_REGMAP_MESON
+	select MFD_SYSCON
+	help
+	  Enables support for NAND controller on Amlogic's Meson SoCs.
+	  This controller is found on Meson GXBB, GXL, AXG SoCs.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/raw/Makefile b/drivers/mtd/nand/raw/Makefile
index 57159b3..a2cc2fe 100644
--- a/drivers/mtd/nand/raw/Makefile
+++ b/drivers/mtd/nand/raw/Makefile
@@ -56,6 +56,7 @@  obj-$(CONFIG_MTD_NAND_BRCMNAND)		+= brcmnand/
 obj-$(CONFIG_MTD_NAND_QCOM)		+= qcom_nandc.o
 obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_ecc.o mtk_nand.o
 obj-$(CONFIG_MTD_NAND_TEGRA)		+= tegra_nand.o
+obj-$(CONFIG_MTD_NAND_MESON)		+= meson_nand.o
 
 nand-objs := nand_base.o nand_legacy.o nand_bbt.o nand_timings.o nand_ids.o
 nand-objs += nand_onfi.o
diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
new file mode 100644
index 0000000..e196c0d
--- /dev/null
+++ b/drivers/mtd/nand/raw/meson_nand.c
@@ -0,0 +1,1360 @@ 
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Amlogic Meson Nand Flash Controller Driver
+ *
+ * Copyright (c) 2018 Amlogic, inc.
+ * Author: Liang Yang <liang.yang@amlogic.com>
+ */
+
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/mtd/rawnand.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#define NFC_REG_CMD		0x00
+#define NFC_CMD_DRD		(0x8 << 14)
+#define NFC_CMD_IDLE		(0xc << 14)
+#define NFC_CMD_DWR		(0x4 << 14)
+#define NFC_CMD_CLE		(0x5 << 14)
+#define NFC_CMD_ALE		(0x6 << 14)
+#define NFC_CMD_ADL		((0 << 16) | (3 << 20))
+#define NFC_CMD_ADH		((1 << 16) | (3 << 20))
+#define NFC_CMD_AIL		((2 << 16) | (3 << 20))
+#define NFC_CMD_AIH		((3 << 16) | (3 << 20))
+#define NFC_CMD_SEED		((8 << 16) | (3 << 20))
+#define NFC_CMD_M2N		((0 << 17) | (2 << 20))
+#define NFC_CMD_N2M		((1 << 17) | (2 << 20))
+#define NFC_CMD_RB		BIT(20)
+#define NFC_CMD_IO6		((0xb << 10) | (1 << 18))
+#define NFC_CMD_SCRAMBLER_ENABLE	BIT(19)
+#define NFC_CMD_RB_INT		BIT(14)
+
+#define NFC_CMD_GET_SIZE(x)	(((x) >> 22) & GENMASK(4, 0))
+#define NFC_CMD_RB_TIMEOUT	0x18
+
+#define NFC_REG_CFG		0x04
+#define NFC_REG_DADR		0x08
+#define NFC_REG_IADR		0x0c
+#define NFC_REG_BUF		0x10
+#define NFC_REG_INFO		0x14
+#define NFC_REG_DC		0x18
+#define NFC_REG_ADR		0x1c
+#define NFC_REG_DL		0x20
+#define NFC_REG_DH		0x24
+#define NFC_REG_CADR		0x28
+#define NFC_REG_SADR		0x2c
+#define NFC_REG_PINS		0x30
+#define NFC_REG_VER		0x38
+
+#define NFC_RB_IRQ_EN		BIT(21)
+#define NFC_INT_MASK		(3 << 20)
+
+#define CMDRWGEN(cmd_dir, ran, bch, short_mode, page_size, pages)	\
+	(								\
+		(cmd_dir)			|			\
+		((ran) << 19)			|			\
+		((bch) << 14)			|			\
+		((short_mode) << 13)		|			\
+		(((page_size) & 0x7f) << 6)	|			\
+		((pages) & 0x3f)					\
+	)
+
+#define GENCMDDADDRL(adl, addr)		((adl) | ((addr) & 0xffff))
+#define GENCMDDADDRH(adh, addr)		((adh) | (((addr) >> 16) & 0xffff))
+#define GENCMDIADDRL(ail, addr)		((ail) | ((addr) & 0xffff))
+#define GENCMDIADDRH(aih, addr)		((aih) | (((addr) >> 16) & 0xffff))
+
+#define RB_STA(x)		(1 << (26 + (x)))
+#define DMA_DIR(dir)		((dir) ? NFC_CMD_N2M : NFC_CMD_M2N)
+
+#define ECC_CHECK_RETURN_FF	(-1)
+
+#define NAND_CE0		(0xe << 10)
+#define NAND_CE1		(0xd << 10)
+
+#define DMA_BUSY_TIMEOUT	0x100000
+#define CMD_FIFO_EMPTY_TIMEOUT	1000
+
+#define MAX_CE_NUM		2
+
+/* eMMC clock register, misc control */
+#define SD_EMMC_CLOCK		0x00
+#define CLK_ALWAYS_ON		BIT(28)
+#define CLK_SELECT_NAND		BIT(31)
+#define CLK_DIV_MASK		GENMASK(5, 0)
+
+#define NFC_CLK_CYCLE		6
+
+/* nand flash controller delay 3 ns */
+#define NFC_DEFAULT_DELAY	3000
+
+#define MAX_ECC_INDEX		10
+
+#define MUX_CLK_NUM_PARENTS	2
+
+#define ROW_ADDER(page, index)	(((page) >> (8 * (index))) & 0xff)
+#define MAX_CYCLE_ROW_ADDRS	3
+#define MAX_CYCLE_COLUMN_ADDRS	2
+#define DIRREAD			1
+#define DIRWRITE		0
+
+#define ECC_PARITY_BCH8_512B	14
+
+#define PER_INFO_BYTE		8
+#define ECC_GET_PROTECTED_OOB_BYTE(x, y)			\
+		((x) >> (8 * (y)) & GENMASK(7, 0))
+
+#define ECC_SET_PROTECTED_OOB_BYTE(x, y, z)			\
+	{							\
+		(x) &= (~((__le64)(0xff) << (8 * (y))));	\
+		(x) |= ((__le64)(z) << (8 * (y)));		\
+	}
+
+#define ECC_COMPLETE            BIT(31)
+#define ECC_ERR_CNT(x)		(((x) >> 24) & GENMASK(5, 0))
+#define ECC_ZERO_CNT(x)		(((x) >> 16) & GENMASK(5, 0))
+
+struct meson_nfc_nand_chip {
+	struct list_head node;
+	struct nand_chip nand;
+	int clk_rate;
+	int level1_divider;
+	int bus_timing;
+	int twb;
+	int tadl;
+
+	int bch_mode;
+	u8 *data_buf;
+	__le64 *info_buf;
+	int nsels;
+	u8 sels[0];
+};
+
+struct meson_nand_ecc {
+	int bch;
+	int strength;
+};
+
+struct meson_nfc_data {
+	const struct nand_ecc_caps *ecc_caps;
+};
+
+struct meson_nfc_param {
+	int chip_select;
+	int rb_select;
+};
+
+struct nand_rw_cmd {
+	int cmd0;
+	int col[MAX_CYCLE_COLUMN_ADDRS];
+	int row[MAX_CYCLE_ROW_ADDRS];
+	int cmd1;
+};
+
+struct nand_timing {
+	int twb;
+	int tadl;
+};
+
+struct meson_nfc {
+	struct nand_controller controller;
+	struct clk *core_clk;
+	struct clk *device_clk;
+	struct clk *phase_tx;
+	struct clk *phase_rx;
+
+	int clk_rate;
+	int bus_timing;
+
+	struct device *dev;
+	void __iomem *reg_base;
+	struct regmap *reg_clk;
+	struct completion completion;
+	struct list_head chips;
+	const struct meson_nfc_data *data;
+	struct meson_nfc_param param;
+	struct nand_timing timing;
+	union {
+		int cmd[32];
+		struct nand_rw_cmd rw;
+	} cmdfifo;
+
+	dma_addr_t data_dma;
+	dma_addr_t info_dma;
+
+	unsigned long assigned_cs;
+};
+
+enum {
+	NFC_ECC_BCH8_1K		= 2,
+	NFC_ECC_BCH24_1K,
+	NFC_ECC_BCH30_1K,
+	NFC_ECC_BCH40_1K,
+	NFC_ECC_BCH50_1K,
+	NFC_ECC_BCH60_1K,
+};
+
+#define MESON_ECC_DATA(b, s)	{ .bch = (b),	.strength = (s)}
+
+static int meson_nand_calc_ecc_bytes(int step_size, int strength)
+{
+	int ecc_bytes;
+
+	if (step_size == 512 && strength == 8)
+		return ECC_PARITY_BCH8_512B;
+
+	ecc_bytes = DIV_ROUND_UP(strength * fls(step_size * 8), 8);
+	ecc_bytes = ALIGN(ecc_bytes, 2);
+
+	return ecc_bytes;
+}
+
+NAND_ECC_CAPS_SINGLE(meson_gxl_ecc_caps,
+		     meson_nand_calc_ecc_bytes, 1024, 8, 24, 30, 40, 50, 60);
+NAND_ECC_CAPS_SINGLE(meson_axg_ecc_caps,
+		     meson_nand_calc_ecc_bytes, 1024, 8);
+
+static inline
+struct meson_nfc_nand_chip *to_meson_nand(struct nand_chip *nand)
+{
+	return container_of(nand, struct meson_nfc_nand_chip, nand);
+}
+
+static void meson_nfc_select_chip(struct nand_chip *nand, int chip)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	struct meson_nfc *nfc = nand_get_controller_data(nand);
+	int ret, value;
+
+	if (chip < 0 || WARN_ON_ONCE(chip > MAX_CE_NUM))
+		return;
+
+	nfc->param.chip_select = meson_chip->sels[chip] ? NAND_CE1 : NAND_CE0;
+	nfc->param.rb_select = nfc->param.chip_select;
+	nfc->timing.twb = meson_chip->twb;
+	nfc->timing.tadl = meson_chip->tadl;
+
+	if (chip >= 0) {
+		if (nfc->clk_rate != meson_chip->clk_rate) {
+			ret = clk_set_rate(nfc->device_clk,
+					   meson_chip->clk_rate);
+			if (ret) {
+				dev_err(nfc->dev, "failed to set clock rate\n");
+				return;
+			}
+			nfc->clk_rate = meson_chip->clk_rate;
+		}
+		if (nfc->bus_timing != meson_chip->bus_timing) {
+			value = (NFC_CLK_CYCLE - 1)
+				| (meson_chip->bus_timing << 5);
+			writel(value, nfc->reg_base + NFC_REG_CFG);
+			writel((1 << 31), nfc->reg_base + NFC_REG_CMD);
+			nfc->bus_timing =  meson_chip->bus_timing;
+		}
+	}
+}
+
+static inline void meson_nfc_cmd_idle(struct meson_nfc *nfc, u32 time)
+{
+	writel(nfc->param.chip_select | NFC_CMD_IDLE | (time & 0x3ff),
+	       nfc->reg_base + NFC_REG_CMD);
+}
+
+static inline void meson_nfc_cmd_seed(struct meson_nfc *nfc, u32 seed)
+{
+	writel(NFC_CMD_SEED | (0xc2 + (seed & 0x7fff)),
+	       nfc->reg_base + NFC_REG_CMD);
+}
+
+static void meson_nfc_cmd_access(struct nand_chip *nand, int raw, bool dir)
+{
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	struct meson_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	u32 cmd, pagesize, pages;
+	int bch = meson_chip->bch_mode;
+	int len = mtd->writesize;
+	int scramble = (nand->options & NAND_NEED_SCRAMBLING) ? 1 : 0;
+
+	pagesize = nand->ecc.size;
+
+	if (raw) {
+		len = mtd->writesize + mtd->oobsize;
+		cmd = (len & 0x3fff) | NFC_CMD_SCRAMBLER_ENABLE | DMA_DIR(dir);
+		writel(cmd, nfc->reg_base + NFC_REG_CMD);
+		return;
+	}
+
+	pages = len / nand->ecc.size;
+
+	cmd = CMDRWGEN(DMA_DIR(dir), scramble, bch, 0, pagesize, pages);
+
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+}
+
+static inline void meson_nfc_drain_cmd(struct meson_nfc *nfc)
+{
+	/*
+	 * Insert two commands to make sure all valid commands are finished.
+	 *
+	 * The Nand flash controller is designed as two stages pipleline -
+	 *  a) fetch and b) excute.
+	 * There might be cases when the driver see command queue is empty,
+	 * but the Nand flash controller still has two commands buffered,
+	 * one is fetched into NFC request queue (ready to run), and another
+	 * is actively executing. So pushing 2 "IDLE" commands guarantees that
+	 * the pipeline is emptied.
+	 */
+	meson_nfc_cmd_idle(nfc, 0);
+	meson_nfc_cmd_idle(nfc, 0);
+}
+
+static int meson_nfc_wait_cmd_finish(struct meson_nfc *nfc,
+				     unsigned int timeout_ms)
+{
+	u32 cmd_size = 0;
+	int ret;
+
+	/* wait cmd fifo is empty */
+	ret = readl_poll_timeout(nfc->reg_base + NFC_REG_CMD, cmd_size,
+				 !NFC_CMD_GET_SIZE(cmd_size),
+				 10, timeout_ms * 1000);
+	if (ret)
+		dev_err(nfc->dev, "wait for empty cmd FIFO time out\n");
+
+	return ret;
+}
+
+static int meson_nfc_wait_dma_finish(struct meson_nfc *nfc)
+{
+	meson_nfc_drain_cmd(nfc);
+
+	return meson_nfc_wait_cmd_finish(nfc, DMA_BUSY_TIMEOUT);
+}
+
+static inline __le64 nfc_info_ptr(struct nand_chip *nand, int index)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+
+	return le64_to_cpu(meson_chip->info_buf[index]);
+}
+
+static u8 *meson_nfc_oob_ptr(struct nand_chip *nand, int i)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	int len;
+
+	len = nand->ecc.size * (i + 1) + (nand->ecc.bytes + 2) * i;
+
+	return meson_chip->data_buf + len;
+}
+
+static u8 *meson_nfc_data_ptr(struct nand_chip *nand, int i)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	int len;
+	int temp;
+
+	temp = nand->ecc.size + nand->ecc.bytes;
+	len = (temp + 2) * i;
+
+	return meson_chip->data_buf + len;
+}
+
+static void meson_nfc_get_data_oob(struct nand_chip *nand,
+				   u8 *buf, u8 *oobbuf)
+{
+	int i, oob_len = 0;
+	u8 *dsrc, *osrc;
+
+	oob_len = nand->ecc.bytes + 2;
+	for (i = 0; i < nand->ecc.steps; i++) {
+		if (buf) {
+			dsrc = meson_nfc_data_ptr(nand, i);
+			memcpy(buf, dsrc, nand->ecc.size);
+			buf += nand->ecc.size;
+		}
+		osrc = meson_nfc_oob_ptr(nand, i);
+		memcpy(oobbuf, osrc, oob_len);
+		oobbuf += oob_len;
+	}
+}
+
+static void meson_nfc_set_data_oob(struct nand_chip *nand,
+				   const u8 *buf, u8 *oobbuf)
+{
+	int i, oob_len = 0;
+	u8 *dsrc, *osrc;
+
+	oob_len = nand->ecc.bytes + 2;
+	for (i = 0; i < nand->ecc.steps; i++) {
+		if (buf) {
+			dsrc = meson_nfc_data_ptr(nand, i);
+			memcpy(dsrc, buf, nand->ecc.size);
+			buf += nand->ecc.size;
+		}
+		osrc = meson_nfc_oob_ptr(nand, i);
+		memcpy(osrc, oobbuf, oob_len);
+		oobbuf += oob_len;
+	}
+}
+
+static int meson_nfc_queue_rb(struct meson_nfc *nfc, int timeout_ms)
+{
+	u32 cmd, cfg;
+	int ret = 0;
+
+	meson_nfc_cmd_idle(nfc, nfc->timing.twb);
+	meson_nfc_drain_cmd(nfc);
+	meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);
+
+	cfg = readl(nfc->reg_base + NFC_REG_CFG);
+	cfg |= (1 << 21);
+	writel(cfg, nfc->reg_base + NFC_REG_CFG);
+
+	init_completion(&nfc->completion);
+
+	cmd = NFC_CMD_RB | NFC_CMD_RB_INT
+		| nfc->param.chip_select | NFC_CMD_RB_TIMEOUT;
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	ret = wait_for_completion_timeout(&nfc->completion,
+					  msecs_to_jiffies(timeout_ms));
+	if (ret == 0) {
+		dev_err(nfc->dev, "wait nand irq timeout\n");
+		ret = -1;
+	}
+	return ret;
+}
+
+static void meson_nfc_set_user_byte(struct nand_chip *nand, u8 *oob_buf)
+{
+	__le64 info;
+	int i, count;
+
+	for (i = 0, count = 0; i < nand->ecc.steps; i++, count += 2) {
+		info = nfc_info_ptr(nand, i);
+		ECC_SET_PROTECTED_OOB_BYTE(info, 0, oob_buf[count]);
+		ECC_SET_PROTECTED_OOB_BYTE(info, 1, oob_buf[count + 1]);
+	}
+}
+
+static void meson_nfc_get_user_byte(struct nand_chip *nand, u8 *oob_buf)
+{
+	__le64 info;
+	int i, count;
+
+	for (i = 0, count = 0; i < nand->ecc.steps; i++, count += 2) {
+		info = nfc_info_ptr(nand, i);
+		oob_buf[count] = ECC_GET_PROTECTED_OOB_BYTE(info, 0);
+		oob_buf[count + 1] = ECC_GET_PROTECTED_OOB_BYTE(info, 1);
+	}
+}
+
+static int meson_nfc_ecc_correct(struct nand_chip *nand)
+{
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	__le64 info;
+	u32 bitflips = 0, i;
+	int scramble;
+	u8 zero_cnt;
+
+	scramble = (nand->options & NAND_NEED_SCRAMBLING) ? 1 : 0;
+
+	for (i = 0; i < nand->ecc.steps; i++) {
+		info = nfc_info_ptr(nand, i);
+		if (ECC_ERR_CNT(info) == 0x3f) {
+			zero_cnt = ECC_ZERO_CNT(info);
+			if (scramble && zero_cnt < nand->ecc.strength)
+				return ECC_CHECK_RETURN_FF;
+			mtd->ecc_stats.failed++;
+			continue;
+		}
+		mtd->ecc_stats.corrected += ECC_ERR_CNT(info);
+		bitflips = max_t(u32, bitflips, ECC_ERR_CNT(info));
+	}
+
+	return bitflips;
+}
+
+static inline u8 meson_nfc_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct meson_nfc *nfc = nand_get_controller_data(nand);
+	u32 cmd;
+
+	cmd = nfc->param.chip_select | NFC_CMD_DRD | 0;
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	meson_nfc_drain_cmd(nfc);
+
+	meson_nfc_wait_cmd_finish(nfc, 1000);
+
+	return readb(nfc->reg_base + NFC_REG_BUF);
+}
+
+static void meson_nfc_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		buf[i] = meson_nfc_read_byte(mtd);
+}
+
+static void meson_nfc_write_byte(struct mtd_info *mtd, u8 byte)
+{
+	struct meson_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
+	u32 cmd;
+
+	meson_nfc_cmd_idle(nfc, nfc->timing.twb);
+
+	cmd = nfc->param.chip_select | NFC_CMD_DWR | (byte & 0xff);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	meson_nfc_cmd_idle(nfc, nfc->timing.twb);
+	meson_nfc_cmd_idle(nfc, 0);
+
+	meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);
+}
+
+static void meson_nfc_write_buf(struct mtd_info *mtd,
+				const u8 *buf, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		meson_nfc_write_byte(mtd, buf[i]);
+}
+
+static int meson_nfc_rw_cmd_prepare_and_execute(struct nand_chip *nand,
+						int page, bool in)
+{
+	struct meson_nfc *nfc = nand_get_controller_data(nand);
+	const struct nand_sdr_timings *sdr =
+		nand_get_sdr_timings(&nand->data_interface);
+	int cs = nfc->param.chip_select;
+	int i, cmd0, cmd_num;
+	int ret = 0;
+
+	cmd0 = in ? NAND_CMD_READ0 : NAND_CMD_SEQIN;
+	cmd_num = sizeof(struct nand_rw_cmd) / sizeof(int);
+	if (!in)
+		cmd_num--;
+
+	nfc->cmdfifo.rw.cmd0 = cs | NFC_CMD_CLE | cmd0;
+	for (i = 0; i < MAX_CYCLE_COLUMN_ADDRS; i++)
+		nfc->cmdfifo.rw.col[i] = cs | NFC_CMD_ALE | 0;
+
+	for (i = 0; i < MAX_CYCLE_ROW_ADDRS; i++)
+		nfc->cmdfifo.rw.row[i] = cs | NFC_CMD_ALE | ROW_ADDER(page, i);
+
+	nfc->cmdfifo.rw.cmd1 = cs | NFC_CMD_CLE | NAND_CMD_READSTART;
+
+	for (i = 0; i < cmd_num; i++)
+		writel(nfc->cmdfifo.cmd[i], nfc->reg_base + NFC_REG_CMD);
+
+	if (in)
+		meson_nfc_queue_rb(nfc, sdr->tR_max);
+	else
+		meson_nfc_cmd_idle(nfc, nfc->timing.tadl);
+
+	return ret;
+}
+
+static int meson_nfc_write_page_sub(struct nand_chip *nand, const u8 *buf,
+				    int page, int raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	const struct nand_sdr_timings *sdr =
+		nand_get_sdr_timings(&nand->data_interface);
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	struct meson_nfc *nfc = nand_get_controller_data(nand);
+	dma_addr_t daddr, iaddr;
+	u32 cmd;
+	int ret;
+
+	daddr = dma_map_single(nfc->dev, (void *)meson_chip->data_buf,
+			       mtd->writesize + mtd->oobsize,
+			       DMA_TO_DEVICE);
+	ret = dma_mapping_error(nfc->dev, daddr);
+	if (ret) {
+		dev_err(nfc->dev, "dma mapping error\n");
+		goto err;
+	}
+
+	iaddr = dma_map_single(nfc->dev, (void *)meson_chip->info_buf,
+			       nand->ecc.steps * PER_INFO_BYTE,
+			       DMA_TO_DEVICE);
+	ret = dma_mapping_error(nfc->dev, iaddr);
+	if (ret) {
+		dev_err(nfc->dev, "dma mapping error\n");
+		goto err_map_daddr;
+	}
+
+	ret = meson_nfc_rw_cmd_prepare_and_execute(nand, page, DIRWRITE);
+	if (ret)
+		goto err_map_iaddr;
+
+	cmd = GENCMDDADDRL(NFC_CMD_ADL, daddr);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	cmd = GENCMDDADDRH(NFC_CMD_ADH, daddr);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	cmd = GENCMDIADDRL(NFC_CMD_AIL, iaddr);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	cmd = GENCMDIADDRH(NFC_CMD_AIH, iaddr);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	meson_nfc_cmd_seed(nfc, page);
+
+	meson_nfc_cmd_access(nand, raw, DIRWRITE);
+
+	ret = meson_nfc_wait_dma_finish(nfc);
+	cmd = nfc->param.chip_select | NFC_CMD_CLE | NAND_CMD_PAGEPROG;
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+	meson_nfc_queue_rb(nfc, sdr->tPROG_max);
+
+err_map_iaddr:
+	dma_unmap_single(nfc->dev, iaddr,
+			 nand->ecc.steps * PER_INFO_BYTE, DMA_TO_DEVICE);
+err_map_daddr:
+	dma_unmap_single(nfc->dev, daddr,
+			 mtd->writesize + mtd->oobsize, DMA_TO_DEVICE);
+err:
+	return ret;
+}
+
+static int meson_nfc_write_page_raw(struct nand_chip *nand, const u8 *buf,
+				    int oob_required, int page)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	u8 *oob_buf = nand->oob_poi;
+
+	meson_nfc_set_data_oob(nand, buf, oob_buf);
+
+	return meson_nfc_write_page_sub(nand, meson_chip->data_buf, page, 1);
+}
+
+static int meson_nfc_write_page_hwecc(struct nand_chip *nand,
+				      const u8 *buf, int oob_required, int page)
+{
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	u8 *oob_buf = nand->oob_poi;
+
+	memcpy(meson_chip->data_buf, buf, mtd->writesize);
+	meson_nfc_set_user_byte(nand, oob_buf);
+
+	return meson_nfc_write_page_sub(nand, meson_chip->data_buf, page, 0);
+}
+
+static void meson_nfc_check_ecc_pages_valid(struct meson_nfc *nfc,
+					    struct nand_chip *nand, int raw)
+{
+	__le64 info;
+	int neccpages, i;
+
+	neccpages = raw ? 1 : nand->ecc.steps;
+
+	for (i = 0; i < neccpages; i++) {
+		info = nfc_info_ptr(nand, neccpages - 1);
+		if (!(info & ECC_COMPLETE))
+			dev_err(nfc->dev, "seems eccpage is invalid\n");
+	}
+}
+
+static int meson_nfc_read_page_sub(struct nand_chip *nand, const u8 *buf,
+				   int page, int raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	struct meson_nfc *nfc = nand_get_controller_data(nand);
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	dma_addr_t daddr, iaddr;
+	u32 cmd;
+	int ret;
+
+	daddr = dma_map_single(nfc->dev, (void *)meson_chip->data_buf,
+			       mtd->writesize + mtd->oobsize, DMA_FROM_DEVICE);
+	ret = dma_mapping_error(nfc->dev, daddr);
+	if (ret) {
+		dev_err(nfc->dev, "dma mapping error\n");
+		goto err;
+	}
+
+	iaddr = dma_map_single(nfc->dev, (void *)meson_chip->info_buf,
+			       nand->ecc.steps * PER_INFO_BYTE,
+			       DMA_FROM_DEVICE);
+	ret = dma_mapping_error(nfc->dev, iaddr);
+	if (ret) {
+		dev_err(nfc->dev, "dma mapping error\n");
+		goto err_map_daddr;
+	}
+
+	ret = meson_nfc_rw_cmd_prepare_and_execute(nand, page, DIRREAD);
+	if (ret)
+		goto err_map_iaddr;
+
+	cmd = GENCMDDADDRL(NFC_CMD_ADL, daddr);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	cmd = GENCMDDADDRH(NFC_CMD_ADH, daddr);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	cmd = GENCMDIADDRL(NFC_CMD_AIL, iaddr);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+	cmd = GENCMDIADDRH(NFC_CMD_AIH, iaddr);
+	writel(cmd, nfc->reg_base + NFC_REG_CMD);
+	meson_nfc_cmd_seed(nfc, page);
+	meson_nfc_cmd_access(nand, raw, DIRREAD);
+	ret = meson_nfc_wait_dma_finish(nfc);
+	meson_nfc_check_ecc_pages_valid(nfc, nand, raw);
+
+err_map_iaddr:
+	dma_unmap_single(nfc->dev, iaddr,
+			 nand->ecc.steps * PER_INFO_BYTE, DMA_FROM_DEVICE);
+err_map_daddr:
+	dma_unmap_single(nfc->dev, daddr,
+			 mtd->writesize + mtd->oobsize, DMA_FROM_DEVICE);
+err:
+
+	return ret;
+}
+
+static int meson_nfc_read_page_raw(struct nand_chip *nand, u8 *buf,
+				   int oob_required, int page)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	u8 *oob_buf = nand->oob_poi;
+	int ret;
+
+	ret = meson_nfc_read_page_sub(nand, meson_chip->data_buf, page, 1);
+	if (ret)
+		return ret;
+
+	meson_nfc_get_data_oob(nand, buf, oob_buf);
+
+	return 0;
+}
+
+static int meson_nfc_read_page_hwecc(struct nand_chip *nand, u8 *buf,
+				     int oob_required, int page)
+{
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	u8 *oob_buf = nand->oob_poi;
+	int ret;
+
+	ret = meson_nfc_read_page_sub(nand, meson_chip->data_buf, page, 0);
+	if (ret)
+		return ret;
+
+	meson_nfc_get_user_byte(nand, oob_buf);
+
+	ret = meson_nfc_ecc_correct(nand);
+	if (ret == ECC_CHECK_RETURN_FF) {
+		if (buf)
+			memset(buf, 0xff, mtd->writesize);
+
+		memset(oob_buf, 0xff, mtd->oobsize);
+		return 0;
+	}
+
+	if (buf && buf != meson_chip->data_buf)
+		memcpy(buf, meson_chip->data_buf, mtd->writesize);
+
+	return ret;
+}
+
+static int meson_nfc_read_oob_raw(struct nand_chip *nand, int page)
+{
+	return meson_nfc_read_page_raw(nand, NULL, 1, page);
+}
+
+static int meson_nfc_read_oob(struct nand_chip *nand, int page)
+{
+	return meson_nfc_read_page_hwecc(nand, NULL, 1, page);
+}
+
+static int meson_nfc_exec_op(struct nand_chip *nand,
+			     const struct nand_operation *op, bool check_only)
+{
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	struct meson_nfc *nfc = nand_get_controller_data(nand);
+	const struct nand_op_instr *instr = NULL;
+	int ret = 0, cmd;
+	unsigned int op_id;
+	int i, delay_idle;
+
+	for (op_id = 0; op_id < op->ninstrs; op_id++) {
+		instr = &op->instrs[op_id];
+		delay_idle = DIV_ROUND_UP(PSEC_TO_NSEC(instr->delay_ns),
+					  meson_chip->level1_divider
+					  * NFC_CLK_CYCLE);
+		switch (instr->type) {
+		case NAND_OP_CMD_INSTR:
+			cmd = nfc->param.chip_select | NFC_CMD_CLE;
+			cmd |= instr->ctx.cmd.opcode & 0xff;
+			writel(cmd, nfc->reg_base + NFC_REG_CMD);
+			meson_nfc_cmd_idle(nfc, delay_idle);
+			break;
+
+		case NAND_OP_ADDR_INSTR:
+			for (i = 0; i < instr->ctx.addr.naddrs; i++) {
+				cmd = nfc->param.chip_select | NFC_CMD_ALE;
+				cmd |= instr->ctx.addr.addrs[i] & 0xff;
+				writel(cmd, nfc->reg_base + NFC_REG_CMD);
+			}
+			meson_nfc_cmd_idle(nfc, delay_idle);
+			break;
+
+		case NAND_OP_DATA_IN_INSTR:
+			meson_nfc_read_buf(mtd, instr->ctx.data.buf.in,
+					   instr->ctx.data.len);
+			break;
+
+		case NAND_OP_DATA_OUT_INSTR:
+			meson_nfc_write_buf(mtd, instr->ctx.data.buf.out,
+					    instr->ctx.data.len);
+			break;
+
+		case NAND_OP_WAITRDY_INSTR:
+			meson_nfc_queue_rb(nfc, instr->ctx.waitrdy.timeout_ms);
+			meson_nfc_cmd_idle(nfc, delay_idle);
+			break;
+		}
+	}
+	return ret;
+}
+
+static int meson_ooblayout_ecc(struct mtd_info *mtd, int section,
+			       struct mtd_oob_region *oobregion)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+
+	if (section >= nand->ecc.steps)
+		return -ERANGE;
+
+	oobregion->offset =  2 + (section * (2 + nand->ecc.bytes));
+	oobregion->length = nand->ecc.bytes;
+
+	return 0;
+}
+
+static int meson_ooblayout_free(struct mtd_info *mtd, int section,
+				struct mtd_oob_region *oobregion)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+
+	if (section >= nand->ecc.steps)
+		return -ERANGE;
+
+	oobregion->offset = section * (2 + nand->ecc.bytes);
+	oobregion->length = 2;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops meson_ooblayout_ops = {
+	.ecc = meson_ooblayout_ecc,
+	.free = meson_ooblayout_free,
+};
+
+static int meson_nfc_clk_init(struct meson_nfc *nfc)
+{
+	int ret, default_clk_rate;
+
+	/* request core clock */
+	nfc->core_clk = devm_clk_get(nfc->dev, "core");
+	if (IS_ERR(nfc->core_clk)) {
+		dev_err(nfc->dev, "failed to get core clk\n");
+		return PTR_ERR(nfc->core_clk);
+	}
+
+	nfc->device_clk = devm_clk_get(nfc->dev, "device");
+	if (IS_ERR(nfc->device_clk)) {
+		dev_err(nfc->dev, "failed to get device clk\n");
+		return PTR_ERR(nfc->device_clk);
+	}
+
+	nfc->phase_tx = devm_clk_get(nfc->dev, "tx");
+	if (IS_ERR(nfc->phase_tx)) {
+		dev_err(nfc->dev, "failed to get tx clk\n");
+		return PTR_ERR(nfc->phase_tx);
+	}
+
+	nfc->phase_rx = devm_clk_get(nfc->dev, "rx");
+	if (IS_ERR(nfc->phase_rx)) {
+		dev_err(nfc->dev, "failed to get rx clk\n");
+		return PTR_ERR(nfc->phase_rx);
+	}
+
+	/* init SD_EMMC_CLOCK to sane defaults w/min clock rate */
+	regmap_update_bits(nfc->reg_clk,
+			   0, CLK_SELECT_NAND, CLK_SELECT_NAND);
+
+	ret = clk_prepare_enable(nfc->core_clk);
+	if (ret) {
+		dev_err(nfc->dev, "failed to enable core clk\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(nfc->device_clk);
+	if (ret) {
+		dev_err(nfc->dev, "failed to enable device clk\n");
+		clk_disable_unprepare(nfc->core_clk);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(nfc->phase_tx);
+	if (ret) {
+		dev_err(nfc->dev, "failed to enable tx clk\n");
+		clk_disable_unprepare(nfc->core_clk);
+		clk_disable_unprepare(nfc->device_clk);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(nfc->phase_rx);
+	if (ret) {
+		dev_err(nfc->dev, "failed to enable rx clk\n");
+		clk_disable_unprepare(nfc->core_clk);
+		clk_disable_unprepare(nfc->device_clk);
+		clk_disable_unprepare(nfc->phase_tx);
+		return ret;
+	}
+
+	default_clk_rate = clk_round_rate(nfc->device_clk, 24000000);
+	ret = clk_set_rate(nfc->device_clk, default_clk_rate);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void meson_nfc_disable_clk(struct meson_nfc *nfc)
+{
+	clk_disable_unprepare(nfc->phase_rx);
+	clk_disable_unprepare(nfc->phase_tx);
+	clk_disable_unprepare(nfc->device_clk);
+	clk_disable_unprepare(nfc->core_clk);
+}
+
+static void meson_nfc_free_buffer(struct nand_chip *nand)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+
+	kfree(meson_chip->info_buf);
+	kfree(meson_chip->data_buf);
+}
+
+static int meson_chip_buffer_init(struct nand_chip *nand)
+{
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	int page_bytes, info_bytes;
+	int nsectors;
+
+	nsectors = mtd->writesize / nand->ecc.size;
+
+	page_bytes =  mtd->writesize + mtd->oobsize;
+	info_bytes = nsectors * PER_INFO_BYTE;
+
+	meson_chip->data_buf = kmalloc(page_bytes, GFP_KERNEL);
+	if (!meson_chip->data_buf)
+		return -ENOMEM;
+
+	meson_chip->info_buf = kmalloc(info_bytes, GFP_KERNEL);
+	if (!meson_chip->info_buf) {
+		kfree(meson_chip->data_buf);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static
+int meson_nfc_setup_data_interface(struct nand_chip *nand, int csline,
+				   const struct nand_data_interface *conf)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	const struct nand_sdr_timings *timings;
+	int div, bt_min, bt_max;
+
+	timings = nand_get_sdr_timings(conf);
+	if (IS_ERR(timings))
+		return -ENOTSUPP;
+
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	div = DIV_ROUND_UP((timings->tRC_min / 1000), NFC_CLK_CYCLE);
+	bt_min = (timings->tREA_max + NFC_DEFAULT_DELAY) / div;
+	bt_max = (NFC_DEFAULT_DELAY + timings->tRHOH_min
+				    + timings->tRC_min / 2) / div;
+
+	meson_chip->twb = DIV_ROUND_UP(PSEC_TO_NSEC(timings->tWB_max),
+				       div * NFC_CLK_CYCLE);
+	meson_chip->tadl = DIV_ROUND_UP(PSEC_TO_NSEC(timings->tADL_min),
+					div * NFC_CLK_CYCLE);
+
+	bt_min = DIV_ROUND_UP(bt_min, 1000);
+	bt_max = DIV_ROUND_UP(bt_max, 1000);
+
+	if (bt_max < bt_min)
+		return -EINVAL;
+
+	meson_chip->level1_divider = div;
+	meson_chip->clk_rate = 1000000000 / meson_chip->level1_divider;
+	meson_chip->bus_timing = (bt_min + bt_max) / 2 + 1;
+
+	return 0;
+}
+
+static int meson_nand_bch_mode(struct nand_chip *nand)
+{
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	struct meson_nand_ecc meson_ecc[] = {
+		MESON_ECC_DATA(NFC_ECC_BCH8_1K, 8),
+		MESON_ECC_DATA(NFC_ECC_BCH24_1K, 24),
+		MESON_ECC_DATA(NFC_ECC_BCH30_1K, 30),
+		MESON_ECC_DATA(NFC_ECC_BCH40_1K, 40),
+		MESON_ECC_DATA(NFC_ECC_BCH50_1K, 50),
+		MESON_ECC_DATA(NFC_ECC_BCH60_1K, 60),
+	};
+	int i;
+
+	if (nand->ecc.strength > 60 || nand->ecc.strength < 8)
+		return -EINVAL;
+
+	for (i = 0; i < sizeof(meson_ecc); i++) {
+		if (meson_ecc[i].strength == nand->ecc.strength) {
+			meson_chip->bch_mode = meson_ecc[i].bch;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int meson_nand_attach_chip(struct nand_chip *nand)
+{
+	struct meson_nfc *nfc = nand_get_controller_data(nand);
+	struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+	struct mtd_info *mtd = nand_to_mtd(nand);
+	int nsectors = mtd->writesize / 1024;
+	int ret;
+
+	if (!mtd->name) {
+		mtd->name = devm_kasprintf(nfc->dev, GFP_KERNEL,
+					   "%s:nand%d",
+					   dev_name(nfc->dev),
+					   meson_chip->sels[0]);
+		if (!mtd->name)
+			return -ENOMEM;
+	}
+
+	if (nand->bbt_options & NAND_BBT_USE_FLASH)
+		nand->bbt_options |= NAND_BBT_NO_OOB;
+
+	nand->options |= NAND_NO_SUBPAGE_WRITE;
+
+	ret = nand_ecc_choose_conf(nand, nfc->data->ecc_caps,
+				   mtd->oobsize - 2 * nsectors);
+	if (ret) {
+		dev_err(nfc->dev, "failed to ecc init\n");
+		return -EINVAL;
+	}
+
+	ret = meson_nand_bch_mode(nand);
+	if (ret)
+		return -EINVAL;
+
+	nand->ecc.mode = NAND_ECC_HW;
+	nand->ecc.write_page_raw = meson_nfc_write_page_raw;
+	nand->ecc.write_page = meson_nfc_write_page_hwecc;
+	nand->ecc.write_oob_raw = nand_write_oob_std;
+	nand->ecc.write_oob = nand_write_oob_std;
+
+	nand->ecc.read_page_raw = meson_nfc_read_page_raw;
+	nand->ecc.read_page = meson_nfc_read_page_hwecc;
+	nand->ecc.read_oob_raw = meson_nfc_read_oob_raw;
+	nand->ecc.read_oob = meson_nfc_read_oob;
+
+	if (nand->options & NAND_BUSWIDTH_16) {
+		dev_err(nfc->dev, "16bits buswidth not supported");
+		return -EINVAL;
+	}
+	meson_chip_buffer_init(nand);
+	if (ret)
+		return -ENOMEM;
+
+	return ret;
+}
+
+static const struct nand_controller_ops meson_nand_controller_ops = {
+	.attach_chip = meson_nand_attach_chip,
+};
+
+static int
+meson_nfc_nand_chip_init(struct device *dev,
+			 struct meson_nfc *nfc, struct device_node *np)
+{
+	struct meson_nfc_nand_chip *meson_chip;
+	struct nand_chip *nand;
+	struct mtd_info *mtd;
+	int ret, nsels, i;
+	u32 tmp;
+
+	if (!of_get_property(np, "reg", &nsels))
+		return -EINVAL;
+
+	nsels /= sizeof(u32);
+	if (!nsels || nsels > MAX_CE_NUM) {
+		dev_err(dev, "invalid reg property size\n");
+		return -EINVAL;
+	}
+
+	meson_chip = devm_kzalloc(dev,
+				  sizeof(*meson_chip) + (nsels * sizeof(u8)),
+				  GFP_KERNEL);
+	if (!meson_chip)
+		return -ENOMEM;
+
+	meson_chip->nsels = nsels;
+
+	for (i = 0; i < nsels; i++) {
+		ret = of_property_read_u32_index(np, "reg", i, &tmp);
+		if (ret) {
+			dev_err(dev, "could not retrieve reg property: %d\n",
+				ret);
+			return ret;
+		}
+
+		if (test_and_set_bit(tmp, &nfc->assigned_cs)) {
+			dev_err(dev, "CS %d already assigned\n", tmp);
+			return -EINVAL;
+		}
+	}
+
+	nand = &meson_chip->nand;
+	nand->controller = &nfc->controller;
+	nand->controller->ops = &meson_nand_controller_ops;
+	nand_set_flash_node(nand, np);
+	nand_set_controller_data(nand, nfc);
+
+	nand->options |= NAND_USE_BOUNCE_BUFFER;
+	nand->select_chip = meson_nfc_select_chip;
+	nand->exec_op = meson_nfc_exec_op;
+	nand->setup_data_interface = meson_nfc_setup_data_interface;
+	mtd = nand_to_mtd(nand);
+	mtd->owner = THIS_MODULE;
+	mtd->dev.parent = dev;
+
+	ret = nand_scan(nand, nsels);
+	if (ret)
+		return ret;
+
+	ret = mtd_device_register(mtd, NULL, 0);
+	if (ret) {
+		dev_err(dev, "failed to register mtd device: %d\n", ret);
+		nand_cleanup(nand);
+		return ret;
+	}
+
+	list_add_tail(&meson_chip->node, &nfc->chips);
+
+	return 0;
+}
+
+static int meson_nfc_nand_chip_cleanup(struct meson_nfc *nfc)
+{
+	struct meson_nfc_nand_chip *meson_chip;
+	struct mtd_info *mtd;
+	int ret;
+
+	while (!list_empty(&nfc->chips)) {
+		meson_chip = list_first_entry(&nfc->chips,
+					      struct meson_nfc_nand_chip, node);
+		mtd = nand_to_mtd(&meson_chip->nand);
+		ret = mtd_device_unregister(mtd);
+		if (ret)
+			return ret;
+
+		meson_nfc_free_buffer(&meson_chip->nand);
+		nand_cleanup(&meson_chip->nand);
+		list_del(&meson_chip->node);
+	}
+
+	return 0;
+}
+
+static int meson_nfc_nand_chips_init(struct device *dev,
+				     struct meson_nfc *nfc)
+{
+	struct device_node *np = dev->of_node;
+	struct device_node *nand_np;
+	int ret;
+
+	for_each_child_of_node(np, nand_np) {
+		ret = meson_nfc_nand_chip_init(dev, nfc, nand_np);
+		if (ret) {
+			meson_nfc_nand_chip_cleanup(nfc);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t meson_nfc_irq(int irq, void *id)
+{
+	struct meson_nfc *nfc = id;
+	u32 cfg;
+
+	cfg = readl(nfc->reg_base + NFC_REG_CFG);
+	if (!(cfg & NFC_RB_IRQ_EN))
+		return IRQ_NONE;
+
+	cfg &= ~(NFC_RB_IRQ_EN);
+	writel(cfg, nfc->reg_base + NFC_REG_CFG);
+
+	complete(&nfc->completion);
+	return IRQ_HANDLED;
+}
+
+static const struct meson_nfc_data meson_gxl_data = {
+	.ecc_caps = &meson_gxl_ecc_caps,
+};
+
+static const struct meson_nfc_data meson_axg_data = {
+	.ecc_caps = &meson_axg_ecc_caps,
+};
+
+static const struct of_device_id meson_nfc_id_table[] = {
+	{
+		.compatible = "amlogic,meson-gxl-nfc",
+		.data = &meson_gxl_data,
+	}, {
+		.compatible = "amlogic,meson-axg-nfc",
+		.data = &meson_axg_data,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, meson_nfc_id_table);
+
+static int meson_nfc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct meson_nfc *nfc;
+	struct resource *res;
+	int ret, irq;
+
+	nfc = devm_kzalloc(dev, sizeof(*nfc), GFP_KERNEL);
+	if (!nfc)
+		return -ENOMEM;
+
+	nfc->data = of_device_get_match_data(&pdev->dev);
+	if (!nfc->data)
+		return -ENODEV;
+
+	nand_controller_init(&nfc->controller);
+	INIT_LIST_HEAD(&nfc->chips);
+
+	nfc->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nfc->reg_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(nfc->reg_base))
+		return PTR_ERR(nfc->reg_base);
+
+	nfc->reg_clk =
+		syscon_regmap_lookup_by_phandle(dev->of_node,
+						"amlogic,mmc-syscon");
+	if (IS_ERR(nfc->reg_clk)) {
+		dev_err(dev, "Failed to lookup clock base\n");
+		return PTR_ERR(nfc->reg_clk);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "no nfi irq resource\n");
+		return -EINVAL;
+	}
+
+	ret = meson_nfc_clk_init(nfc);
+	if (ret) {
+		dev_err(dev, "failed to initialize nand clk\n");
+		goto err;
+	}
+
+	writel(0, nfc->reg_base + NFC_REG_CFG);
+	ret = devm_request_irq(dev, irq, meson_nfc_irq, 0, dev_name(dev), nfc);
+	if (ret) {
+		dev_err(dev, "failed to request nfi irq\n");
+		ret = -EINVAL;
+		goto err_clk;
+	}
+
+	ret = dma_set_mask(dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(dev, "failed to set dma mask\n");
+		goto err_clk;
+	}
+
+	platform_set_drvdata(pdev, nfc);
+
+	ret = meson_nfc_nand_chips_init(dev, nfc);
+	if (ret) {
+		dev_err(dev, "failed to init nand chips\n");
+		goto err_clk;
+	}
+
+	return 0;
+
+err_clk:
+	meson_nfc_disable_clk(nfc);
+err:
+	return ret;
+}
+
+static int meson_nfc_remove(struct platform_device *pdev)
+{
+	struct meson_nfc *nfc = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = meson_nfc_nand_chip_cleanup(nfc);
+	if (ret)
+		return ret;
+
+	meson_nfc_disable_clk(nfc);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver meson_nfc_driver = {
+	.probe  = meson_nfc_probe,
+	.remove = meson_nfc_remove,
+	.driver = {
+		.name  = "meson-nand",
+		.of_match_table = meson_nfc_id_table,
+	},
+};
+module_platform_driver(meson_nfc_driver);
+
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_AUTHOR("Liang Yang <liang.yang@amlogic.com>");
+MODULE_DESCRIPTION("Amlogic's Meson NAND Flash Controller driver");