diff mbox

[1/4] mtd: nand: add NVIDIA Tegra NAND Flash controller driver

Message ID 1420403960-26626-1-git-send-email-dev@lynxeye.de
State New, archived
Headers show

Commit Message

Lucas Stach Jan. 4, 2015, 8:39 p.m. UTC
Add support for the NAND flash controller found on NVIDIA
Tegra 2/3 SoCs. This is a largely reworked version of the driver
started by Thierry.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Lucas Stach <dev@lynxeye.de>
---
I've tested this driver with the in-kernel mtd-tests and some
realworld workloads on a Colibri T20 module.
---
 .../bindings/mtd/nvidia,tegra20-nand.txt           |  30 +
 MAINTAINERS                                        |   6 +
 drivers/mtd/nand/Kconfig                           |   6 +
 drivers/mtd/nand/Makefile                          |   1 +
 drivers/mtd/nand/tegra_nand.c                      | 794 +++++++++++++++++++++
 5 files changed, 837 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
 create mode 100644 drivers/mtd/nand/tegra_nand.c

Comments

Stefan Agner Jan. 5, 2015, 11:41 p.m. UTC | #1
Hi Lucas,

Thanks for picking that up!

I did some short benchmarks on Colibri T20 V1.2, L4T. Write/read speeds
I measured on the YAFFS2 based file system:

# dd if=/dev/zero of=test bs=50M count=1 conv=fdatasync
1+0 records in
1+0 records out
52428800 bytes (52 MB) copied, 9.88293 s, 5.3 MB/s

echo 3 > /proc/sys/vm/drop_caches
# dd if=test of=/dev/zero bs=50M count=1
1+0 records in
1+0 records out
52428800 bytes (52 MB) copied, 5.97056 s, 8.8 MB/s

So your values look quite realistic then!

Some comments below...

On 2015-01-04 21:39, Lucas Stach wrote:
> Add support for the NAND flash controller found on NVIDIA
> Tegra 2/3 SoCs. This is a largely reworked version of the driver
> started by Thierry.
> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> ---
> I've tested this driver with the in-kernel mtd-tests and some
> realworld workloads on a Colibri T20 module.
> ---
>  .../bindings/mtd/nvidia,tegra20-nand.txt           |  30 +
>  MAINTAINERS                                        |   6 +
>  drivers/mtd/nand/Kconfig                           |   6 +
>  drivers/mtd/nand/Makefile                          |   1 +
>  drivers/mtd/nand/tegra_nand.c                      | 794 +++++++++++++++++++++
>  5 files changed, 837 insertions(+)
>  create mode 100644
> Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
>  create mode 100644 drivers/mtd/nand/tegra_nand.c
> 
> diff --git
> a/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> new file mode 100644
> index 0000000..088223c
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> @@ -0,0 +1,30 @@
> +NVIDIA Tegra NAND Flash controller
> +
> +Required properties:
> +- compatible: Must be one of:
> +  - "nvidia,tegra20-nand"
> +  - "nvidia,tegra30-nand"
> +- reg: MMIO address range
> +- interrupts: interrupt output of the NFC controller
> +- clocks: Must contain an entry for each entry in clock-names.
> +  See ../clocks/clock-bindings.txt for details.
> +- clock-names: Must include the following entries:
> +  - nand
> +- resets: Must contain an entry for each entry in reset-names.
> +  See ../reset/reset.txt for details.
> +- reset-names: Must include the following entries:
> +  - nand
> +
> +Optional properties:
> +- nvidia,wp-gpios: GPIO used to disable write protection of the flash
> +
> +  Example:
> +	nand@70008000 {
> +		compatible = "nvidia,tegra20-nand";
> +		reg = <0x70008000 0x100>;
> +		interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
> +		clocks = <&tegra_car TEGRA20_CLK_NDFLASH>;
> +		clock-names = "nand";
> +		resets = <&tegra_car 13>;
> +		reset-names = "nand";
> +	};
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ddb9ac8..972e31d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -9459,6 +9459,12 @@ M:	Laxman Dewangan <ldewangan@nvidia.com>
>  S:	Supported
>  F:	drivers/input/keyboard/tegra-kbc.c
>  
> +TEGRA NAND DRIVER
> +M:	Lucas Stach <dev@lynxeye.de>
> +S:	Maintained
> +F:	Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> +F:	drivers/mtd/nand/tegra_nand.c
> +
>  TEGRA PWM DRIVER
>  M:	Thierry Reding <thierry.reding@gmail.com>
>  S:	Supported
> diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
> index 7d0150d..1eafd4e 100644
> --- a/drivers/mtd/nand/Kconfig
> +++ b/drivers/mtd/nand/Kconfig
> @@ -524,4 +524,10 @@ config MTD_NAND_SUNXI
>  	help
>  	  Enables support for NAND Flash chips on Allwinner SoCs.
>  
> +config MTD_NAND_TEGRA
> +	tristate "Support for NAND on NVIDIA Tegra"
> +	depends on ARCH_TEGRA || COMPILE_TEST
> +	help
> +	  Enables support for NAND flash on NVIDIA Tegra SoC based boards.
> +
>  endif # MTD_NAND
> diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
> index bd38f21..58399ce 100644
> --- a/drivers/mtd/nand/Makefile
> +++ b/drivers/mtd/nand/Makefile
> @@ -51,5 +51,6 @@ obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
>  obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
>  obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
>  obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
> +obj-$(CONFIG_MTD_NAND_TEGRA)		+= tegra_nand.o
>  
>  nand-objs := nand_base.o nand_bbt.o nand_timings.o
> diff --git a/drivers/mtd/nand/tegra_nand.c b/drivers/mtd/nand/tegra_nand.c
> new file mode 100644
> index 0000000..b919a6e
> --- /dev/null
> +++ b/drivers/mtd/nand/tegra_nand.c
> @@ -0,0 +1,794 @@
> +/*
> + * Copyright (C) 2014-2015 Lucas Stach <dev@lynxeye.de>
> + * Copyright (C) 2012 Avionic Design GmbH
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/clk.h>
> +#include <linux/completion.h>
> +#include <linux/delay.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/err.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/module.h>
> +#include <linux/mtd/nand.h>
> +#include <linux/mtd/partitions.h>
> +#include <linux/of_gpio.h>
> +#include <linux/of_mtd.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/reset.h>
> +
> +#define CMD				0x00
> +#define   CMD_GO			(1 << 31)
> +#define   CMD_CLE			(1 << 30)
> +#define   CMD_ALE			(1 << 29)
> +#define   CMD_PIO			(1 << 28)
> +#define   CMD_TX			(1 << 27)
> +#define   CMD_RX			(1 << 26)
> +#define   CMD_SEC_CMD			(1 << 25)
> +#define   CMD_AFT_DAT			(1 << 24)
> +#define   CMD_TRANS_SIZE(x)		(((x) & 0xf) << 20)
> +#define   CMD_A_VALID			(1 << 19)
> +#define   CMD_B_VALID			(1 << 18)
> +#define   CMD_RD_STATUS_CHK		(1 << 17)
> +#define   CMD_RBSY_CHK			(1 << 16)
> +#define   CMD_CE(x)			(1 << (8 + ((x) & 0x7)))
> +#define   CMD_CLE_SIZE(x)		(((x) & 0x3) << 4)
> +#define   CMD_ALE_SIZE(x)		(((x) & 0xf) << 0)
> +
> +#define STATUS				0x04
> +
> +#define ISR				0x08
> +#define   ISR_UND			(1 << 7)
> +#define   ISR_OVR			(1 << 6)
> +#define   ISR_CMD_DONE			(1 << 5)
> +#define   ISR_ECC_ERR			(1 << 4)
> +
> +#define IER				0x0c
> +#define   IER_ERR_TRIG_VAL(x)		(((x) & 0xf) << 16)
> +#define   IER_UND			(1 << 7)
> +#define   IER_OVR			(1 << 6)
> +#define   IER_CMD_DONE			(1 << 5)
> +#define   IER_ECC_ERR			(1 << 4)
> +#define   IER_GIE			(1 << 0)
> +
> +#define CFG				0x10
> +#define   CFG_HW_ECC			(1 << 31)
> +#define   CFG_ECC_SEL			(1 << 30)
> +#define   CFG_ERR_COR			(1 << 29)
> +#define   CFG_PIPE_EN			(1 << 28)
> +#define   CFG_TVAL_4			(0 << 24)
> +#define   CFG_TVAL_6			(1 << 24)
> +#define   CFG_TVAL_8			(2 << 24)
> +#define   CFG_SKIP_SPARE		(1 << 23)
> +#define   CFG_BUS_WIDTH_8		(0 << 21)
> +#define   CFG_BUS_WIDTH_16		(1 << 21)
> +#define   CFG_COM_BSY			(1 << 20)
> +#define   CFG_PS_256			(0 << 16)
> +#define   CFG_PS_512			(1 << 16)
> +#define   CFG_PS_1024			(2 << 16)
> +#define   CFG_PS_2048			(3 << 16)
> +#define   CFG_PS_4096			(4 << 16)
> +#define   CFG_SKIP_SPARE_SIZE_4		(0 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_8		(1 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_12	(2 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_16	(3 << 14)
> +#define   CFG_TAG_BYTE_SIZE(x)		((x) & 0xff)
> +
> +#define TIMING_1			0x14
> +#define   TIMING_TRP_RESP(x)		(((x) & 0xf) << 28)
> +#define   TIMING_TWB(x)			(((x) & 0xf) << 24)
> +#define   TIMING_TCR_TAR_TRR(x)		(((x) & 0xf) << 20)
> +#define   TIMING_TWHR(x)		(((x) & 0xf) << 16)
> +#define   TIMING_TCS(x)			(((x) & 0xc) << 14)

In Tegra 2 TRM v02p, this is 15:14. You shift the masked value by 14,
hence the mask should be 0x3...

> +#define   TIMING_TWH(x)			(((x) & 0x3) << 12)
> +#define   TIMING_TWP(x)			(((x) & 0xf) <<  8)
> +#define   TIMING_TRH(x)			(((x) & 0xf) <<  4)
> +#define   TIMING_TRP(x)			(((x) & 0xf) <<  0)
> +
> +#define RESP				0x18
> +
> +#define TIMING_2			0x1c
> +#define   TIMING_TADL(x)		((x) & 0xf)
> +
> +#define CMD_1				0x20
> +#define CMD_2				0x24
> +#define ADDR_1				0x28
> +#define ADDR_2				0x2c
> +
> +#define DMA_CTRL			0x30
> +#define   DMA_CTRL_GO			(1 << 31)
> +#define   DMA_CTRL_IN			(0 << 30)
> +#define   DMA_CTRL_OUT			(1 << 30)
> +#define   DMA_CTRL_PERF_EN		(1 << 29)
> +#define   DMA_CTRL_IE_DONE		(1 << 28)
> +#define   DMA_CTRL_REUSE		(1 << 27)
> +#define   DMA_CTRL_BURST_1		(2 << 24)
> +#define   DMA_CTRL_BURST_4		(3 << 24)
> +#define   DMA_CTRL_BURST_8		(4 << 24)
> +#define   DMA_CTRL_BURST_16		(5 << 24)
> +#define   DMA_CTRL_IS_DONE		(1 << 20)
> +#define   DMA_CTRL_EN_A			(1 <<  2)
> +#define   DMA_CTRL_EN_B			(1 <<  1)
> +
> +#define DMA_CFG_A			0x34
> +#define DMA_CFG_B			0x38
> +
> +#define FIFO_CTRL			0x3c
> +#define   FIFO_CTRL_CLR_ALL		(1 << 3)
> +
> +#define DATA_PTR			0x40
> +#define TAG_PTR				0x44
> +#define ECC_PTR				0x48
> +
> +#define HWSTATUS_CMD			0x50
> +#define HWSTATUS_MASK			0x54
> +#define   HWSTATUS_RDSTATUS_MASK(x)	(((x) & 0xff) << 24)
> +#define   HWSTATUS_RDSTATUS_VALUE(x)	(((x) & 0xff) << 16)
> +#define   HWSTATUS_RBSY_MASK(x)		(((x) & 0xff) << 8)
> +#define   HWSTATUS_RBSY_VALUE(x)	(((x) & 0xff) << 0)
> +
> +#define DEC_RESULT			0xd0
> +#define   DEC_RESULT_CORRFAIL		(1 << 8)
> +
> +#define DEC_STATUS_BUF			0xd4
> +#define   DEC_STATUS_BUF_FAIL_SEC_FLAG(x)	((x) & (0xff << 24))
> +#define   DEC_STATUS_BUF_CORR_SEC_FLAG(x)	((x) & (0xff << 16))
> +#define   DEC_STATUS_BUF_MAX_CORR_CNT(x)	(((x) & 0xf00) >> 8)
> +
> +struct tegra_nand {
> +	void __iomem *regs;
> +	int irq;
> +	struct clk *clk;
> +	struct reset_control *rst;
> +	int wp_gpio;
> +	int buswidth;
> +
> +	struct nand_chip chip;
> +	struct mtd_info mtd;
> +	struct device *dev;
> +
> +	struct completion command_complete;
> +	struct completion dma_complete;
> +
> +	dma_addr_t data_dma;
> +	void *data_buf;
> +	dma_addr_t oob_dma;
> +	void *oob_buf;
> +
> +	int cur_chip;
> +};
> +
> +static inline struct tegra_nand *to_tegra_nand(struct mtd_info *mtd)
> +{
> +	return container_of(mtd, struct tegra_nand, mtd);
> +}
> +
> +static struct nand_ecclayout tegra_nand_oob_16 = {
> +	.eccbytes = 4,
> +	.eccpos = { 3, 4, 5, 6 },
> +	.oobfree = {
> +		{ .offset = 8, . length = 8 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_64 = {
> +	.eccbytes = 36,
> +	.eccpos = {
> +		 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
> +		19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
> +		35, 36, 37, 38, 39

This list contains 37 bytes, is this intended?

> +	},
> +	.oobfree = {
> +		{ .offset = 40, .length = 20 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_128 = {
> +	.eccbytes = 72,
> +	.eccpos = {
> +		 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
> +		19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
> +		35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
> +		51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
> +		67, 68, 69, 70, 71, 72, 73, 74, 75
> +	},
> +	.oobfree = {
> +		{ .offset = 76, .length = 52 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_224 = {
> +	.eccbytes = 144,
> +	.eccpos = {
> +		  3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +		 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
> +		 27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
> +		 39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,
> +		 51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,
> +		 63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
> +		 75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,
> +		 87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,
> +		 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
> +		111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
> +		123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
> +		135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
> +		147
> +	},
> +	.oobfree = {
> +		{ .offset = 148, .length = 76 }
> +	}
> +};
> +
> +static irqreturn_t tegra_nand_irq(int irq, void *data)
> +{
> +	struct tegra_nand *nand = data;
> +	irqreturn_t ret = IRQ_HANDLED;
> +	u32 isr, dma;
> +
> +	isr = readl(nand->regs + ISR);
> +	dma = readl(nand->regs + DMA_CTRL);
> +
> +	if (!isr && !(dma & DMA_CTRL_IS_DONE)) {
> +		ret = IRQ_NONE;
> +		goto out;

The out label doesn't do anything more than just return. Why not just
return IRQ_NONE here and return IRQ_HANDLED at the end, saves the local
variable and helps readability...

Why is this needed anyway, is the IRQ shared with other peripherals?

In the L4T driver, there is a warning message about spurious interrupts,
does this works around this interrupts?

> +	}
> +
> +	if (isr & ISR_CMD_DONE)
> +		complete(&nand->command_complete);
> +
> +	if (isr & ISR_UND)
> +		dev_dbg(nand->dev, "  FIFO underrun\n");

Two spaces?

> +
> +	if (isr & ISR_OVR)
> +		dev_dbg(nand->dev, "  FIFO overrun\n");
> +
> +	/* handle DMA interrupts */
> +	if (dma & DMA_CTRL_IS_DONE) {
> +		writel(dma, nand->regs + DMA_CTRL);
> +		complete(&nand->dma_complete);
> +	}
> +
> +	/* clear interrupts */
> +	writel(isr, nand->regs + ISR);
> +
> +out:
> +	return ret;
> +}
> +
> +static void tegra_nand_command(struct mtd_info *mtd, unsigned int command,
> +			       int column, int page_addr)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	u32 value;
> +
> +	switch (command) {
> +	case NAND_CMD_READOOB:
> +		column += mtd->writesize;
> +		/* fall-through */
> +
> +	case NAND_CMD_READ0:
> +		writel(NAND_CMD_READ0, nand->regs + CMD_1);
> +		writel(NAND_CMD_READSTART, nand->regs + CMD_2);
> +
> +		value = (page_addr << 16) | (column & 0xffff);
> +		writel(value, nand->regs + ADDR_1);
> +
> +		value = page_addr >> 16;
> +		writel(value, nand->regs + ADDR_2);
> +
> +		value = CMD_CLE | CMD_ALE | CMD_ALE_SIZE(4) | CMD_SEC_CMD |
> +			CMD_RBSY_CHK | CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_SEQIN:
> +		writel(NAND_CMD_SEQIN, nand->regs + CMD_1);
> +
> +		value = (page_addr << 16) | (column & 0xffff);
> +		writel(value, nand->regs + ADDR_1);
> +
> +		value = page_addr >> 16;
> +		writel(value, nand->regs + ADDR_2);
> +
> +		value = CMD_CLE | CMD_ALE | CMD_ALE_SIZE(4) |
> +			CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_PAGEPROG:
> +		writel(NAND_CMD_PAGEPROG, nand->regs + CMD_1);
> +
> +		value = CMD_CLE | CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_READID:
> +		writel(NAND_CMD_READID, nand->regs + CMD_1);
> +		writel(column & 0xff, nand->regs + ADDR_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_ERASE1:
> +		writel(NAND_CMD_ERASE1, nand->regs + CMD_1);
> +		writel(NAND_CMD_ERASE2, nand->regs + CMD_2);
> +		writel(page_addr, nand->regs + ADDR_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_ALE_SIZE(2) |
> +			CMD_SEC_CMD | CMD_RBSY_CHK | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_ERASE2:
> +		return;
> +
> +	case NAND_CMD_STATUS:
> +		writel(NAND_CMD_STATUS, nand->regs + CMD_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_PARAM:
> +		writel(NAND_CMD_PARAM, nand->regs + CMD_1);
> +		writel(column & 0xff, nand->regs + ADDR_1);
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_RESET:
> +		writel(NAND_CMD_RESET, nand->regs + CMD_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	default:
> +		dev_warn(nand->dev, "unsupported command: %x\n", command);
> +		return;
> +	}
> +
> +	wait_for_completion(&nand->command_complete);
> +}
> +
> +static void tegra_nand_select_chip(struct mtd_info *mtd, int chip)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +
> +	nand->cur_chip = chip;
> +}
> +
> +static uint8_t tegra_nand_read_byte(struct mtd_info *mtd)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	u32 value;
> +
> +	value = CMD_TRANS_SIZE(0) | CMD_CE(nand->cur_chip) |
> +		CMD_PIO | CMD_RX | CMD_A_VALID | CMD_GO;
> +
> +	writel(value, nand->regs + CMD);
> +	wait_for_completion(&nand->command_complete);
> +
> +	return readl(nand->regs + RESP) & 0xff;
> +}
> +
> +static void tegra_nand_read_buf(struct mtd_info *mtd, uint8_t *buffer,
> +				int length)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	size_t i;
> +
> +	for (i = 0; i < length; i += 4) {
> +		u32 value;
> +		size_t n = min_t(size_t, length - i, 4);
> +
> +		value = CMD_GO | CMD_PIO | CMD_RX | CMD_A_VALID |
> +			CMD_CE(nand->cur_chip) | CMD_TRANS_SIZE(n - 1);
> +
> +		writel(value, nand->regs + CMD);
> +		wait_for_completion(&nand->command_complete);
> +
> +		value = readl(nand->regs + RESP);
> +		memcpy(buffer + i, &value, n);
> +	}
> +}
> +
> +static void tegra_nand_write_buf(struct mtd_info *mtd, const uint8_t *buffer,
> +				 int length)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	size_t i;
> +
> +	for (i = 0; i < length; i += 4) {
> +		u32 value;
> +		size_t n = min_t(size_t, length - i, 4);
> +
> +		memcpy(&value, buffer + i, n);
> +		writel(value, nand->regs + RESP);
> +
> +		value = CMD_GO | CMD_PIO | CMD_TX | CMD_A_VALID |
> +			CMD_CE(nand->cur_chip) | CMD_TRANS_SIZE(n - 1);
> +
> +		writel(value, nand->regs + CMD);
> +		wait_for_completion(&nand->command_complete);
> +	}
> +}
> +
> +static int tegra_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
> +				uint8_t *buf, int oob_required, int page)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	u32 value;
> +
> +	writel(mtd->writesize - 1, nand->regs + DMA_CFG_A);
> +	writel(nand->data_dma, nand->regs + DATA_PTR);
> +
> +	if (oob_required) {
> +		writel(mtd->oobsize - 1, nand->regs + DMA_CFG_B);
> +		writel(nand->oob_dma, nand->regs + TAG_PTR);
> +	} else {
> +		writel(0, nand->regs + DMA_CFG_B);
> +		writel(0, nand->regs + TAG_PTR);
> +	}
> +
> +	value = DMA_CTRL_GO | DMA_CTRL_IN | DMA_CTRL_PERF_EN |
> +		DMA_CTRL_REUSE | DMA_CTRL_IE_DONE | DMA_CTRL_IS_DONE |
> +		DMA_CTRL_BURST_8 | DMA_CTRL_EN_A;
> +
> +	if (oob_required)
> +		value |= DMA_CTRL_EN_B;
> +
> +	writel(value, nand->regs + DMA_CTRL);
> +
> +	value = CMD_GO | CMD_RX | CMD_TRANS_SIZE(8) |
> +		CMD_A_VALID | CMD_CE(nand->cur_chip);
> +	if (oob_required)
> +		value |= CMD_B_VALID;
> +	writel(value, nand->regs + CMD);
> +
> +	wait_for_completion(&nand->command_complete);
> +	wait_for_completion(&nand->dma_complete);
> +
> +	if (oob_required)
> +		memcpy(chip->oob_poi, nand->oob_buf, mtd->oobsize);
> +	memcpy(buf, nand->data_buf, mtd->writesize);
> +
> +	value = readl(nand->regs + DEC_RESULT);
> +	if (value & DEC_RESULT_CORRFAIL) {
> +		value = readl(nand->regs + DEC_STATUS_BUF);
> +
> +		if (DEC_STATUS_BUF_FAIL_SEC_FLAG(value))
> +			return -1;
> +
> +		if (DEC_STATUS_BUF_CORR_SEC_FLAG(value))
> +			return DEC_STATUS_BUF_MAX_CORR_CNT(value);
> +	}
> +
> +	return 0;
> +}
> +
> +static int tegra_nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
> +				 const uint8_t *buf, int oob_required)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	unsigned long value;
> +	int ret = 0;
> +
> +	memcpy(nand->data_buf, buf, mtd->writesize);
> +
> +	writel(mtd->writesize - 1, nand->regs + DMA_CFG_A);
> +	writel(nand->data_dma, nand->regs + DATA_PTR);
> +
> +	writel(0, nand->regs + DMA_CFG_B);
> +	writel(0, nand->regs + TAG_PTR);
> +
> +	value = DMA_CTRL_GO | DMA_CTRL_OUT | DMA_CTRL_PERF_EN |
> +		DMA_CTRL_IE_DONE | DMA_CTRL_IS_DONE |
> +		DMA_CTRL_BURST_8 | DMA_CTRL_EN_A;
> +	writel(value, nand->regs + DMA_CTRL);
> +
> +	value = CMD_GO | CMD_TX | CMD_A_VALID | CMD_TRANS_SIZE(8) |
> +		CMD_CE(nand->cur_chip);
> +	writel(value, nand->regs + CMD);
> +
> +	wait_for_completion(&nand->command_complete);
> +	wait_for_completion(&nand->dma_complete);
> +
> +	return ret;

ret is never accessed in this function hence you can omit it.

> +}
> +
> +static void tegra_nand_setup_timing(struct tegra_nand *nand, int mode)
> +{
> +	unsigned long rate = clk_get_rate(nand->clk) / 1000000;
> +	unsigned long period = 1000000 / rate;

Hm, period of a clock in ns... Sounds like a common use case. I searched
for a macro/helper, but did not found anything. Well then.

> +	const struct nand_sdr_timings *timings;
> +	u32 val, reg = 0;
> +
> +	timings = onfi_async_timing_mode_to_sdr_timings(mode);
> +
> +	val = max3(timings->tAR_min, timings->tRR_min,
> +		   timings->tRC_min) / period;
> +	if (val > 2)
> +		val -= 2;

According to my TRM this is:
Generated timing = (n+3) * NAND_CLK_PERIOD ns.

Shouldn't this look like
	if (val >= 3)
		val -= 3;
then?

I see that all the calculations of the timings below are different then
in TRM. I think we need to take the whole offset into account, or do I
miss something here?


> +	reg |= TIMING_TCR_TAR_TRR(val);
> +
> +	val = max(max(timings->tCS_min, timings->tCH_min),
> +		  max(timings->tALS_min, timings->tALH_min)) / period;
> +	if (val > 1)
> +		val -= 1;
> +	reg |= TIMING_TCS(val);

See mask error in macro definition.

> +
> +	val = max(timings->tRP_min, timings->tREA_max) + 6000;
> +	reg |= TIMING_TRP(val / 1000);
> +	reg |= TIMING_TRP_RESP(val / period);
> +
> +	reg |= TIMING_TWB(timings->tWB_max / period);
> +	reg |= TIMING_TWHR(timings->tWHR_min / period);
> +	reg |= TIMING_TWH(timings->tWH_min / 1000);
> +	reg |= TIMING_TWP(timings->tWP_min / 1000);
> +	reg |= TIMING_TRH(timings->tRHW_min / 1000);

Why 1000 for those three values? In my TRM, those values are in
NAND_CLK_PERIOD too.

> +
> +	writel(reg, nand->regs + TIMING_1);
> +
> +	val = timings->tADL_min / period;
> +	if (val > 2)
> +		val -= 2;
> +	reg = TIMING_TADL(val);
> +
> +	writel(reg, nand->regs + TIMING_2);
> +}
> +
> +static void tegra_nand_setup_chiptiming(struct tegra_nand *nand)
> +{
> +	struct nand_chip *chip = &nand->chip;
> +	int mode;
> +
> +	mode = onfi_get_async_timing_mode(chip);
> +	if (mode == ONFI_TIMING_MODE_UNKNOWN)
> +		mode = chip->onfi_timing_mode_default;
> +	else
> +		mode = fls(mode);
> +
> +	tegra_nand_setup_timing(nand, mode);
> +}
> +
> +static int tegra_nand_parse_dt(struct device_node *node,
> +			       struct tegra_nand *nand)
> +{
> +	enum of_gpio_flags flags;
> +
> +	nand->wp_gpio = of_get_named_gpio_flags(node, "nvidia,wp-gpios", 0,
> +						 &flags);

There is a non flags variant, this should work fine for this case I
guess.

> +	if (nand->wp_gpio < 0)
> +		nand->wp_gpio = 0;
> +
> +	nand->buswidth = of_get_nand_bus_width(node);
> +	if (nand->buswidth < 0)
> +		return nand->buswidth;
> +
> +	return 0;
> +}
> +
> +static const char * const part_probes[] = {
> +	"cmdlinepart", "ofpart", NULL };
> +
> +static int tegra_nand_probe(struct platform_device *pdev)
> +{
> +	struct tegra_nand *nand;
> +	struct nand_chip *chip;
> +	struct mtd_info *mtd;
> +	struct resource *res;
> +	unsigned long value;
> +	int err = 0;
> +
> +	nand = devm_kzalloc(&pdev->dev, sizeof(*nand), GFP_KERNEL);
> +	if (!nand)
> +		return -ENOMEM;
> +
> +	nand->dev = &pdev->dev;
> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	nand->regs = devm_ioremap_resource(&pdev->dev, res);
> +	if (IS_ERR(nand->regs))
> +		return PTR_ERR(nand->regs);
> +
> +	nand->irq  = platform_get_irq(pdev, 0);

Double space...

> +	err = devm_request_irq(&pdev->dev, nand->irq, tegra_nand_irq, 0,
> +			       dev_name(&pdev->dev), nand);
> +	if (err)
> +		return err;
> +
> +	nand->rst = devm_reset_control_get(&pdev->dev, "nand");
> +	if (IS_ERR(nand->rst))
> +		return PTR_ERR(nand->rst);
> +
> +	nand->clk = devm_clk_get(&pdev->dev, "nand");
> +	if (IS_ERR(nand->clk))
> +		return PTR_ERR(nand->clk);
> +
> +	err = tegra_nand_parse_dt(pdev->dev.of_node, nand);
> +	if (err)
> +		return err;
> +
> +	err = clk_prepare_enable(nand->clk);
> +	if (err)
> +		return err;
> +
> +	reset_control_assert(nand->rst);
> +	udelay(2);
> +	reset_control_deassert(nand->rst);
> +
> +	if (gpio_is_valid(nand->wp_gpio)) {
> +		err = devm_gpio_request_one(&pdev->dev, nand->wp_gpio,
> +				GPIOF_OUT_INIT_HIGH, "tegra-nand-wp");
> +		if (err)
> +			return err;
> +	}
> +
> +	value = HWSTATUS_RDSTATUS_MASK(1) | HWSTATUS_RDSTATUS_VALUE(0) |
> +		HWSTATUS_RBSY_MASK(NAND_STATUS_READY) |
> +		HWSTATUS_RBSY_VALUE(NAND_STATUS_READY);
> +	writel(NAND_CMD_STATUS, nand->regs + HWSTATUS_CMD);
> +	writel(value, nand->regs + HWSTATUS_MASK);
> +
> +	init_completion(&nand->command_complete);
> +	init_completion(&nand->dma_complete);
> +
> +	mtd = &nand->mtd;
> +	mtd->name = dev_name(&pdev->dev);
> +	mtd->owner = THIS_MODULE;
> +	mtd->priv = &nand->chip;
> +
> +	mtd->type = MTD_NANDFLASH;
> +	mtd->flags = MTD_CAP_NANDFLASH;
> +
> +	/* clear interrupts */
> +	value = readl(nand->regs + ISR);
> +	writel(value, nand->regs + ISR);
> +
> +	writel(DMA_CTRL_IS_DONE, nand->regs + DMA_CTRL);
> +
> +	/* enable interrupts */
> +	value = IER_UND | IER_OVR | IER_CMD_DONE | IER_ECC_ERR | IER_GIE;
> +	writel(value, nand->regs + IER);
> +
> +	chip = &nand->chip;
> +	chip->cmdfunc = tegra_nand_command;
> +	chip->select_chip = tegra_nand_select_chip;
> +	chip->read_byte = tegra_nand_read_byte;
> +	chip->read_buf = tegra_nand_read_buf;
> +	chip->write_buf = tegra_nand_write_buf;
> +
> +	tegra_nand_setup_timing(nand, 0);
> +
> +	err = nand_scan_ident(mtd, 1, NULL);
> +	if (err)
> +		return err;
> +
> +	nand->data_buf = dmam_alloc_coherent(&pdev->dev, mtd->writesize,
> +					    &nand->data_dma, GFP_KERNEL);
> +	if (!nand->data_buf)
> +		return -ENOMEM;
> +
> +	nand->oob_buf = dmam_alloc_coherent(&pdev->dev, mtd->oobsize,
> +					    &nand->oob_dma, GFP_KERNEL);
> +	if (!nand->oob_buf)
> +		return -ENOMEM;
> +
> +	chip->ecc.mode = NAND_ECC_HW;
> +	chip->ecc.size = 512;
> +	chip->ecc.bytes = mtd->oobsize;
> +	chip->ecc.read_page = tegra_nand_read_page;
> +	chip->ecc.write_page = tegra_nand_write_page;
> +
> +	value = CFG_HW_ECC | CFG_ECC_SEL | CFG_ERR_COR | CFG_PIPE_EN |
> +		CFG_TVAL_8 | CFG_SKIP_SPARE | CFG_SKIP_SPARE_SIZE_4;
> +
> +	switch (mtd->oobsize) {
> +	case 16:
> +		chip->ecc.layout = &tegra_nand_oob_16;
> +		chip->ecc.strength = 1;
> +		value |= CFG_TAG_BYTE_SIZE(4);
> +		break;
> +	case 64:
> +		chip->ecc.layout = &tegra_nand_oob_64;
> +		chip->ecc.strength = 8;
> +		value |= CFG_TAG_BYTE_SIZE(36);
> +		break;
> +	case 128:
> +		chip->ecc.layout = &tegra_nand_oob_128;
> +		chip->ecc.strength = 8;
> +		value |= CFG_TAG_BYTE_SIZE(72);
> +		break;
> +	case 224:
> +		chip->ecc.layout = &tegra_nand_oob_224;
> +		chip->ecc.strength = 8;
> +		value |= CFG_TAG_BYTE_SIZE(144);
> +		break;
> +	default:
> +		dev_err(&pdev->dev, "unhandled OOB size %d\n", mtd->oobsize);
> +		return -ENODEV;
> +	}
> +
> +	switch (mtd->writesize) {
> +	case 256:
> +		value |= CFG_PS_256;
> +		break;
> +	case 512:
> +		value |= CFG_PS_512;
> +		break;
> +	case 1024:
> +		value |= CFG_PS_1024;
> +		break;
> +	case 2048:
> +		value |= CFG_PS_2048;
> +		break;
> +	case 4096:
> +		value |= CFG_PS_4096;
> +		break;
> +	default:
> +		dev_err(&pdev->dev, "unhandled writesize %d\n", mtd->writesize);
> +		return -ENODEV;
> +	}
> +
> +	if (nand->buswidth == 16)
> +		value |= CFG_BUS_WIDTH_16;
> +
> +	writel(value, nand->regs + CFG);
> +
> +	tegra_nand_setup_chiptiming(nand);
> +
> +	err = nand_scan_tail(mtd);
> +	if (err)
> +		return err;
> +
> +	mtd_device_parse_register(mtd, NULL,
> +				  &(struct mtd_part_parser_data) {
> +					.of_node = pdev->dev.of_node,
> +				  },
> +				  NULL, 0);
> +
> +	platform_set_drvdata(pdev, nand);
> +
> +	return 0;
> +}
> +
> +static int tegra_nand_remove(struct platform_device *pdev)
> +{
> +	struct tegra_nand *nand = platform_get_drvdata(pdev);
> +
> +	nand_release(&nand->mtd);
> +
> +	clk_disable_unprepare(nand->clk);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id tegra_nand_of_match[] = {
> +	{ .compatible = "nvidia,tegra20-nand" },
> +	{ .compatible = "nvidia,tegra30-nand" },
> +	{ }
> +};
> +
> +static struct platform_driver tegra_nand_driver = {
> +	.driver = {
> +		.name = "tegra-nand",
> +		.of_match_table = tegra_nand_of_match,
> +	},
> +	.probe = tegra_nand_probe,
> +	.remove = tegra_nand_remove,
> +};
> +module_platform_driver(tegra_nand_driver);
> +
> +MODULE_DESCRIPTION("NVIDIA Tegra NAND driver");
> +MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de");
> +MODULE_AUTHOR("Lucas Stach <dev@lynxeye.de");
> +MODULE_LICENSE("GPL v2");
> +MODULE_DEVICE_TABLE(of, tegra_nand_of_match);

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ezequiel Garcia Jan. 6, 2015, 6:27 p.m. UTC | #2
On 01/04/2015 05:39 PM, Lucas Stach wrote:

Hi Lucas,

The driver looks mostly good. Just a few comments on my side.

> Add support for the NAND flash controller found on NVIDIA
> Tegra 2/3 SoCs. This is a largely reworked version of the driver
> started by Thierry.
> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> ---
> I've tested this driver with the in-kernel mtd-tests and some
> realworld workloads on a Colibri T20 module.
> ---
>  .../bindings/mtd/nvidia,tegra20-nand.txt           |  30 +
>  MAINTAINERS                                        |   6 +
>  drivers/mtd/nand/Kconfig                           |   6 +
>  drivers/mtd/nand/Makefile                          |   1 +
>  drivers/mtd/nand/tegra_nand.c                      | 794 +++++++++++++++++++++
>  5 files changed, 837 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
>  create mode 100644 drivers/mtd/nand/tegra_nand.c
> 
> diff --git a/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> new file mode 100644
> index 0000000..088223c
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> @@ -0,0 +1,30 @@
> +NVIDIA Tegra NAND Flash controller
> +
> +Required properties:
> +- compatible: Must be one of:
> +  - "nvidia,tegra20-nand"
> +  - "nvidia,tegra30-nand"
> +- reg: MMIO address range
> +- interrupts: interrupt output of the NFC controller
> +- clocks: Must contain an entry for each entry in clock-names.
> +  See ../clocks/clock-bindings.txt for details.
> +- clock-names: Must include the following entries:
> +  - nand
> +- resets: Must contain an entry for each entry in reset-names.
> +  See ../reset/reset.txt for details.
> +- reset-names: Must include the following entries:
> +  - nand
> +
> +Optional properties:
> +- nvidia,wp-gpios: GPIO used to disable write protection of the flash
> +
> +  Example:
> +	nand@70008000 {
> +		compatible = "nvidia,tegra20-nand";
> +		reg = <0x70008000 0x100>;
> +		interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
> +		clocks = <&tegra_car TEGRA20_CLK_NDFLASH>;
> +		clock-names = "nand";
> +		resets = <&tegra_car 13>;
> +		reset-names = "nand";
> +	};
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ddb9ac8..972e31d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -9459,6 +9459,12 @@ M:	Laxman Dewangan <ldewangan@nvidia.com>
>  S:	Supported
>  F:	drivers/input/keyboard/tegra-kbc.c
>  
> +TEGRA NAND DRIVER
> +M:	Lucas Stach <dev@lynxeye.de>
> +S:	Maintained
> +F:	Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> +F:	drivers/mtd/nand/tegra_nand.c
> +
>  TEGRA PWM DRIVER
>  M:	Thierry Reding <thierry.reding@gmail.com>
>  S:	Supported
> diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
> index 7d0150d..1eafd4e 100644
> --- a/drivers/mtd/nand/Kconfig
> +++ b/drivers/mtd/nand/Kconfig
> @@ -524,4 +524,10 @@ config MTD_NAND_SUNXI
>  	help
>  	  Enables support for NAND Flash chips on Allwinner SoCs.
>  
> +config MTD_NAND_TEGRA
> +	tristate "Support for NAND on NVIDIA Tegra"
> +	depends on ARCH_TEGRA || COMPILE_TEST
> +	help
> +	  Enables support for NAND flash on NVIDIA Tegra SoC based boards.
> +
>  endif # MTD_NAND
> diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
> index bd38f21..58399ce 100644
> --- a/drivers/mtd/nand/Makefile
> +++ b/drivers/mtd/nand/Makefile
> @@ -51,5 +51,6 @@ obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
>  obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
>  obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
>  obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
> +obj-$(CONFIG_MTD_NAND_TEGRA)		+= tegra_nand.o
>  
>  nand-objs := nand_base.o nand_bbt.o nand_timings.o
> diff --git a/drivers/mtd/nand/tegra_nand.c b/drivers/mtd/nand/tegra_nand.c
> new file mode 100644
> index 0000000..b919a6e
> --- /dev/null
> +++ b/drivers/mtd/nand/tegra_nand.c
> @@ -0,0 +1,794 @@
> +/*
> + * Copyright (C) 2014-2015 Lucas Stach <dev@lynxeye.de>
> + * Copyright (C) 2012 Avionic Design GmbH
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/clk.h>
> +#include <linux/completion.h>
> +#include <linux/delay.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/err.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/module.h>
> +#include <linux/mtd/nand.h>
> +#include <linux/mtd/partitions.h>
> +#include <linux/of_gpio.h>
> +#include <linux/of_mtd.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/reset.h>
> +
> +#define CMD				0x00
> +#define   CMD_GO			(1 << 31)
> +#define   CMD_CLE			(1 << 30)
> +#define   CMD_ALE			(1 << 29)
> +#define   CMD_PIO			(1 << 28)
> +#define   CMD_TX			(1 << 27)
> +#define   CMD_RX			(1 << 26)

How about using BIT() ?

> +#define   CMD_SEC_CMD			(1 << 25)
> +#define   CMD_AFT_DAT			(1 << 24)
> +#define   CMD_TRANS_SIZE(x)		(((x) & 0xf) << 20)
> +#define   CMD_A_VALID			(1 << 19)
> +#define   CMD_B_VALID			(1 << 18)
> +#define   CMD_RD_STATUS_CHK		(1 << 17)
> +#define   CMD_RBSY_CHK			(1 << 16)
> +#define   CMD_CE(x)			(1 << (8 + ((x) & 0x7)))
> +#define   CMD_CLE_SIZE(x)		(((x) & 0x3) << 4)
> +#define   CMD_ALE_SIZE(x)		(((x) & 0xf) << 0)
> +
> +#define STATUS				0x04
> +
> +#define ISR				0x08
> +#define   ISR_UND			(1 << 7)
> +#define   ISR_OVR			(1 << 6)
> +#define   ISR_CMD_DONE			(1 << 5)
> +#define   ISR_ECC_ERR			(1 << 4)
> +
> +#define IER				0x0c
> +#define   IER_ERR_TRIG_VAL(x)		(((x) & 0xf) << 16)
> +#define   IER_UND			(1 << 7)
> +#define   IER_OVR			(1 << 6)
> +#define   IER_CMD_DONE			(1 << 5)
> +#define   IER_ECC_ERR			(1 << 4)
> +#define   IER_GIE			(1 << 0)
> +
> +#define CFG				0x10
> +#define   CFG_HW_ECC			(1 << 31)
> +#define   CFG_ECC_SEL			(1 << 30)
> +#define   CFG_ERR_COR			(1 << 29)
> +#define   CFG_PIPE_EN			(1 << 28)
> +#define   CFG_TVAL_4			(0 << 24)
> +#define   CFG_TVAL_6			(1 << 24)
> +#define   CFG_TVAL_8			(2 << 24)
> +#define   CFG_SKIP_SPARE		(1 << 23)
> +#define   CFG_BUS_WIDTH_8		(0 << 21)
> +#define   CFG_BUS_WIDTH_16		(1 << 21)
> +#define   CFG_COM_BSY			(1 << 20)
> +#define   CFG_PS_256			(0 << 16)
> +#define   CFG_PS_512			(1 << 16)
> +#define   CFG_PS_1024			(2 << 16)
> +#define   CFG_PS_2048			(3 << 16)
> +#define   CFG_PS_4096			(4 << 16)
> +#define   CFG_SKIP_SPARE_SIZE_4		(0 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_8		(1 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_12	(2 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_16	(3 << 14)
> +#define   CFG_TAG_BYTE_SIZE(x)		((x) & 0xff)
> +
> +#define TIMING_1			0x14
> +#define   TIMING_TRP_RESP(x)		(((x) & 0xf) << 28)
> +#define   TIMING_TWB(x)			(((x) & 0xf) << 24)
> +#define   TIMING_TCR_TAR_TRR(x)		(((x) & 0xf) << 20)
> +#define   TIMING_TWHR(x)		(((x) & 0xf) << 16)
> +#define   TIMING_TCS(x)			(((x) & 0xc) << 14)
> +#define   TIMING_TWH(x)			(((x) & 0x3) << 12)
> +#define   TIMING_TWP(x)			(((x) & 0xf) <<  8)
> +#define   TIMING_TRH(x)			(((x) & 0xf) <<  4)
> +#define   TIMING_TRP(x)			(((x) & 0xf) <<  0)
> +
> +#define RESP				0x18
> +
> +#define TIMING_2			0x1c
> +#define   TIMING_TADL(x)		((x) & 0xf)
> +
> +#define CMD_1				0x20
> +#define CMD_2				0x24
> +#define ADDR_1				0x28
> +#define ADDR_2				0x2c
> +
> +#define DMA_CTRL			0x30
> +#define   DMA_CTRL_GO			(1 << 31)
> +#define   DMA_CTRL_IN			(0 << 30)
> +#define   DMA_CTRL_OUT			(1 << 30)
> +#define   DMA_CTRL_PERF_EN		(1 << 29)
> +#define   DMA_CTRL_IE_DONE		(1 << 28)
> +#define   DMA_CTRL_REUSE		(1 << 27)
> +#define   DMA_CTRL_BURST_1		(2 << 24)
> +#define   DMA_CTRL_BURST_4		(3 << 24)
> +#define   DMA_CTRL_BURST_8		(4 << 24)
> +#define   DMA_CTRL_BURST_16		(5 << 24)
> +#define   DMA_CTRL_IS_DONE		(1 << 20)
> +#define   DMA_CTRL_EN_A			(1 <<  2)
> +#define   DMA_CTRL_EN_B			(1 <<  1)
> +
> +#define DMA_CFG_A			0x34
> +#define DMA_CFG_B			0x38
> +
> +#define FIFO_CTRL			0x3c
> +#define   FIFO_CTRL_CLR_ALL		(1 << 3)
> +
> +#define DATA_PTR			0x40
> +#define TAG_PTR				0x44
> +#define ECC_PTR				0x48
> +
> +#define HWSTATUS_CMD			0x50
> +#define HWSTATUS_MASK			0x54
> +#define   HWSTATUS_RDSTATUS_MASK(x)	(((x) & 0xff) << 24)
> +#define   HWSTATUS_RDSTATUS_VALUE(x)	(((x) & 0xff) << 16)
> +#define   HWSTATUS_RBSY_MASK(x)		(((x) & 0xff) << 8)
> +#define   HWSTATUS_RBSY_VALUE(x)	(((x) & 0xff) << 0)
> +
> +#define DEC_RESULT			0xd0
> +#define   DEC_RESULT_CORRFAIL		(1 << 8)
> +
> +#define DEC_STATUS_BUF			0xd4
> +#define   DEC_STATUS_BUF_FAIL_SEC_FLAG(x)	((x) & (0xff << 24))
> +#define   DEC_STATUS_BUF_CORR_SEC_FLAG(x)	((x) & (0xff << 16))
> +#define   DEC_STATUS_BUF_MAX_CORR_CNT(x)	(((x) & 0xf00) >> 8)
> +
> +struct tegra_nand {
> +	void __iomem *regs;
> +	int irq;

Seems like you don't need to store irq.

> +	struct clk *clk;
> +	struct reset_control *rst;
> +	int wp_gpio;
> +	int buswidth;

And also you don't seem to need either wp_gpio or buswidth stored
in the struct. You only use them at probe time.

> +
> +	struct nand_chip chip;
> +	struct mtd_info mtd;
> +	struct device *dev;
> +
> +	struct completion command_complete;
> +	struct completion dma_complete;
> +
> +	dma_addr_t data_dma;
> +	void *data_buf;
> +	dma_addr_t oob_dma;
> +	void *oob_buf;
> +
> +	int cur_chip;
> +};
> +
> +static inline struct tegra_nand *to_tegra_nand(struct mtd_info *mtd)
> +{
> +	return container_of(mtd, struct tegra_nand, mtd);
> +}
> +
> +static struct nand_ecclayout tegra_nand_oob_16 = {
> +	.eccbytes = 4,
> +	.eccpos = { 3, 4, 5, 6 },
> +	.oobfree = {
> +		{ .offset = 8, . length = 8 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_64 = {
> +	.eccbytes = 36,
> +	.eccpos = {
> +		 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
> +		19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
> +		35, 36, 37, 38, 39
> +	},
> +	.oobfree = {
> +		{ .offset = 40, .length = 20 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_128 = {
> +	.eccbytes = 72,
> +	.eccpos = {
> +		 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
> +		19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
> +		35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
> +		51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
> +		67, 68, 69, 70, 71, 72, 73, 74, 75
> +	},
> +	.oobfree = {
> +		{ .offset = 76, .length = 52 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_224 = {
> +	.eccbytes = 144,
> +	.eccpos = {
> +		  3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +		 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
> +		 27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
> +		 39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,
> +		 51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,
> +		 63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
> +		 75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,
> +		 87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,
> +		 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
> +		111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
> +		123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
> +		135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
> +		147
> +	},
> +	.oobfree = {
> +		{ .offset = 148, .length = 76 }
> +	}
> +};
> +
> +static irqreturn_t tegra_nand_irq(int irq, void *data)
> +{
> +	struct tegra_nand *nand = data;
> +	irqreturn_t ret = IRQ_HANDLED;
> +	u32 isr, dma;
> +
> +	isr = readl(nand->regs + ISR);
> +	dma = readl(nand->regs + DMA_CTRL);
> +
> +	if (!isr && !(dma & DMA_CTRL_IS_DONE)) {
> +		ret = IRQ_NONE;
> +		goto out;
> +	}
> +
> +	if (isr & ISR_CMD_DONE)
> +		complete(&nand->command_complete);
> +
> +	if (isr & ISR_UND)
> +		dev_dbg(nand->dev, "  FIFO underrun\n");
> +
> +	if (isr & ISR_OVR)
> +		dev_dbg(nand->dev, "  FIFO overrun\n");
> +
> +	/* handle DMA interrupts */
> +	if (dma & DMA_CTRL_IS_DONE) {
> +		writel(dma, nand->regs + DMA_CTRL);
> +		complete(&nand->dma_complete);
> +	}
> +
> +	/* clear interrupts */
> +	writel(isr, nand->regs + ISR);
> +
> +out:
> +	return ret;
> +}
> +
> +static void tegra_nand_command(struct mtd_info *mtd, unsigned int command,
> +			       int column, int page_addr)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	u32 value;
> +
> +	switch (command) {
> +	case NAND_CMD_READOOB:
> +		column += mtd->writesize;
> +		/* fall-through */
> +
> +	case NAND_CMD_READ0:
> +		writel(NAND_CMD_READ0, nand->regs + CMD_1);
> +		writel(NAND_CMD_READSTART, nand->regs + CMD_2);
> +
> +		value = (page_addr << 16) | (column & 0xffff);
> +		writel(value, nand->regs + ADDR_1);
> +
> +		value = page_addr >> 16;
> +		writel(value, nand->regs + ADDR_2);
> +
> +		value = CMD_CLE | CMD_ALE | CMD_ALE_SIZE(4) | CMD_SEC_CMD |
> +			CMD_RBSY_CHK | CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_SEQIN:
> +		writel(NAND_CMD_SEQIN, nand->regs + CMD_1);
> +
> +		value = (page_addr << 16) | (column & 0xffff);
> +		writel(value, nand->regs + ADDR_1);
> +
> +		value = page_addr >> 16;
> +		writel(value, nand->regs + ADDR_2);
> +
> +		value = CMD_CLE | CMD_ALE | CMD_ALE_SIZE(4) |
> +			CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_PAGEPROG:
> +		writel(NAND_CMD_PAGEPROG, nand->regs + CMD_1);
> +
> +		value = CMD_CLE | CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_READID:
> +		writel(NAND_CMD_READID, nand->regs + CMD_1);
> +		writel(column & 0xff, nand->regs + ADDR_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_ERASE1:
> +		writel(NAND_CMD_ERASE1, nand->regs + CMD_1);
> +		writel(NAND_CMD_ERASE2, nand->regs + CMD_2);
> +		writel(page_addr, nand->regs + ADDR_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_ALE_SIZE(2) |
> +			CMD_SEC_CMD | CMD_RBSY_CHK | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_ERASE2:
> +		return;
> +
> +	case NAND_CMD_STATUS:
> +		writel(NAND_CMD_STATUS, nand->regs + CMD_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_PARAM:
> +		writel(NAND_CMD_PARAM, nand->regs + CMD_1);
> +		writel(column & 0xff, nand->regs + ADDR_1);
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_RESET:
> +		writel(NAND_CMD_RESET, nand->regs + CMD_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	default:
> +		dev_warn(nand->dev, "unsupported command: %x\n", command);
> +		return;
> +	}
> +
> +	wait_for_completion(&nand->command_complete);
> +}
> +
> +static void tegra_nand_select_chip(struct mtd_info *mtd, int chip)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +
> +	nand->cur_chip = chip;
> +}
> +
> +static uint8_t tegra_nand_read_byte(struct mtd_info *mtd)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	u32 value;
> +
> +	value = CMD_TRANS_SIZE(0) | CMD_CE(nand->cur_chip) |
> +		CMD_PIO | CMD_RX | CMD_A_VALID | CMD_GO;
> +
> +	writel(value, nand->regs + CMD);
> +	wait_for_completion(&nand->command_complete);
> +
> +	return readl(nand->regs + RESP) & 0xff;
> +}
> +
> +static void tegra_nand_read_buf(struct mtd_info *mtd, uint8_t *buffer,
> +				int length)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	size_t i;
> +
> +	for (i = 0; i < length; i += 4) {
> +		u32 value;
> +		size_t n = min_t(size_t, length - i, 4);
> +
> +		value = CMD_GO | CMD_PIO | CMD_RX | CMD_A_VALID |
> +			CMD_CE(nand->cur_chip) | CMD_TRANS_SIZE(n - 1);
> +
> +		writel(value, nand->regs + CMD);
> +		wait_for_completion(&nand->command_complete);
> +
> +		value = readl(nand->regs + RESP);
> +		memcpy(buffer + i, &value, n);
> +	}
> +}
> +
> +static void tegra_nand_write_buf(struct mtd_info *mtd, const uint8_t *buffer,
> +				 int length)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	size_t i;
> +
> +	for (i = 0; i < length; i += 4) {
> +		u32 value;
> +		size_t n = min_t(size_t, length - i, 4);
> +
> +		memcpy(&value, buffer + i, n);
> +		writel(value, nand->regs + RESP);
> +
> +		value = CMD_GO | CMD_PIO | CMD_TX | CMD_A_VALID |
> +			CMD_CE(nand->cur_chip) | CMD_TRANS_SIZE(n - 1);
> +
> +		writel(value, nand->regs + CMD);
> +		wait_for_completion(&nand->command_complete);
> +	}
> +}
> +
> +static int tegra_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
> +				uint8_t *buf, int oob_required, int page)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	u32 value;
> +
> +	writel(mtd->writesize - 1, nand->regs + DMA_CFG_A);
> +	writel(nand->data_dma, nand->regs + DATA_PTR);
> +
> +	if (oob_required) {
> +		writel(mtd->oobsize - 1, nand->regs + DMA_CFG_B);
> +		writel(nand->oob_dma, nand->regs + TAG_PTR);
> +	} else {
> +		writel(0, nand->regs + DMA_CFG_B);
> +		writel(0, nand->regs + TAG_PTR);
> +	}
> +
> +	value = DMA_CTRL_GO | DMA_CTRL_IN | DMA_CTRL_PERF_EN |
> +		DMA_CTRL_REUSE | DMA_CTRL_IE_DONE | DMA_CTRL_IS_DONE |
> +		DMA_CTRL_BURST_8 | DMA_CTRL_EN_A;
> +
> +	if (oob_required)
> +		value |= DMA_CTRL_EN_B;
> +
> +	writel(value, nand->regs + DMA_CTRL);
> +
> +	value = CMD_GO | CMD_RX | CMD_TRANS_SIZE(8) |
> +		CMD_A_VALID | CMD_CE(nand->cur_chip);
> +	if (oob_required)
> +		value |= CMD_B_VALID;
> +	writel(value, nand->regs + CMD);
> +
> +	wait_for_completion(&nand->command_complete);
> +	wait_for_completion(&nand->dma_complete);
> +
> +	if (oob_required)
> +		memcpy(chip->oob_poi, nand->oob_buf, mtd->oobsize);
> +	memcpy(buf, nand->data_buf, mtd->writesize);
> +
> +	value = readl(nand->regs + DEC_RESULT);
> +	if (value & DEC_RESULT_CORRFAIL) {
> +		value = readl(nand->regs + DEC_STATUS_BUF);
> +
> +		if (DEC_STATUS_BUF_FAIL_SEC_FLAG(value))
> +			return -1;
> +
> +		if (DEC_STATUS_BUF_CORR_SEC_FLAG(value))
> +			return DEC_STATUS_BUF_MAX_CORR_CNT(value);
> +	}
> +
> +	return 0;
> +}
> +
> +static int tegra_nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
> +				 const uint8_t *buf, int oob_required)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	unsigned long value;
> +	int ret = 0;
> +
> +	memcpy(nand->data_buf, buf, mtd->writesize);
> +
> +	writel(mtd->writesize - 1, nand->regs + DMA_CFG_A);
> +	writel(nand->data_dma, nand->regs + DATA_PTR);
> +
> +	writel(0, nand->regs + DMA_CFG_B);
> +	writel(0, nand->regs + TAG_PTR);
> +
> +	value = DMA_CTRL_GO | DMA_CTRL_OUT | DMA_CTRL_PERF_EN |
> +		DMA_CTRL_IE_DONE | DMA_CTRL_IS_DONE |
> +		DMA_CTRL_BURST_8 | DMA_CTRL_EN_A;
> +	writel(value, nand->regs + DMA_CTRL);
> +
> +	value = CMD_GO | CMD_TX | CMD_A_VALID | CMD_TRANS_SIZE(8) |
> +		CMD_CE(nand->cur_chip);
> +	writel(value, nand->regs + CMD);
> +
> +	wait_for_completion(&nand->command_complete);
> +	wait_for_completion(&nand->dma_complete);
> +
> +	return ret;
> +}
> +
> +static void tegra_nand_setup_timing(struct tegra_nand *nand, int mode)
> +{
> +	unsigned long rate = clk_get_rate(nand->clk) / 1000000;
> +	unsigned long period = 1000000 / rate;
> +	const struct nand_sdr_timings *timings;
> +	u32 val, reg = 0;
> +
> +	timings = onfi_async_timing_mode_to_sdr_timings(mode);
> +
> +	val = max3(timings->tAR_min, timings->tRR_min,
> +		   timings->tRC_min) / period;
> +	if (val > 2)
> +		val -= 2;
> +	reg |= TIMING_TCR_TAR_TRR(val);
> +
> +	val = max(max(timings->tCS_min, timings->tCH_min),
> +		  max(timings->tALS_min, timings->tALH_min)) / period;
> +	if (val > 1)
> +		val -= 1;
> +	reg |= TIMING_TCS(val);
> +
> +	val = max(timings->tRP_min, timings->tREA_max) + 6000;
> +	reg |= TIMING_TRP(val / 1000);
> +	reg |= TIMING_TRP_RESP(val / period);
> +
> +	reg |= TIMING_TWB(timings->tWB_max / period);
> +	reg |= TIMING_TWHR(timings->tWHR_min / period);
> +	reg |= TIMING_TWH(timings->tWH_min / 1000);
> +	reg |= TIMING_TWP(timings->tWP_min / 1000);
> +	reg |= TIMING_TRH(timings->tRHW_min / 1000);
> +
> +	writel(reg, nand->regs + TIMING_1);
> +
> +	val = timings->tADL_min / period;
> +	if (val > 2)
> +		val -= 2;
> +	reg = TIMING_TADL(val);
> +
> +	writel(reg, nand->regs + TIMING_2);
> +}
> +
> +static void tegra_nand_setup_chiptiming(struct tegra_nand *nand)
> +{
> +	struct nand_chip *chip = &nand->chip;
> +	int mode;
> +
> +	mode = onfi_get_async_timing_mode(chip);
> +	if (mode == ONFI_TIMING_MODE_UNKNOWN)
> +		mode = chip->onfi_timing_mode_default;
> +	else
> +		mode = fls(mode);
> +
> +	tegra_nand_setup_timing(nand, mode);
> +}
> +
> +static int tegra_nand_parse_dt(struct device_node *node,
> +			       struct tegra_nand *nand)
> +{
> +	enum of_gpio_flags flags;
> +
> +	nand->wp_gpio = of_get_named_gpio_flags(node, "nvidia,wp-gpios", 0,
> +						 &flags);
> +	if (nand->wp_gpio < 0)
> +		nand->wp_gpio = 0;
> +
> +	nand->buswidth = of_get_nand_bus_width(node);
> +	if (nand->buswidth < 0)
> +		return nand->buswidth;
> +

I believe you should set NAND_BUSWIDTH_16 flag in nand_chip.options,
before calling nand_scan_ident?

Also, if you just access the of_get_nand_bus_width before nand_scan_ident
you can drop the nand->buswidth field.

Same goes for wp_gpio.

> +	return 0;
> +}
> +
> +static const char * const part_probes[] = {
> +	"cmdlinepart", "ofpart", NULL };
> +
> +static int tegra_nand_probe(struct platform_device *pdev)
> +{
> +	struct tegra_nand *nand;
> +	struct nand_chip *chip;
> +	struct mtd_info *mtd;
> +	struct resource *res;
> +	unsigned long value;
> +	int err = 0;
> +
> +	nand = devm_kzalloc(&pdev->dev, sizeof(*nand), GFP_KERNEL);
> +	if (!nand)
> +		return -ENOMEM;
> +
> +	nand->dev = &pdev->dev;
> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	nand->regs = devm_ioremap_resource(&pdev->dev, res);
> +	if (IS_ERR(nand->regs))
> +		return PTR_ERR(nand->regs);
> +
> +	nand->irq  = platform_get_irq(pdev, 0);
> +	err = devm_request_irq(&pdev->dev, nand->irq, tegra_nand_irq, 0,
> +			       dev_name(&pdev->dev), nand);
> +	if (err)
> +		return err;
> +
> +	nand->rst = devm_reset_control_get(&pdev->dev, "nand");
> +	if (IS_ERR(nand->rst))
> +		return PTR_ERR(nand->rst);
> +
> +	nand->clk = devm_clk_get(&pdev->dev, "nand");
> +	if (IS_ERR(nand->clk))
> +		return PTR_ERR(nand->clk);
> +
> +	err = tegra_nand_parse_dt(pdev->dev.of_node, nand);
> +	if (err)
> +		return err;
> +
> +	err = clk_prepare_enable(nand->clk);
> +	if (err)
> +		return err;
> +
> +	reset_control_assert(nand->rst);
> +	udelay(2);
> +	reset_control_deassert(nand->rst);
> +
> +	if (gpio_is_valid(nand->wp_gpio)) {
> +		err = devm_gpio_request_one(&pdev->dev, nand->wp_gpio,
> +				GPIOF_OUT_INIT_HIGH, "tegra-nand-wp");
> +		if (err)
> +			return err;
> +	}
> +
> +	value = HWSTATUS_RDSTATUS_MASK(1) | HWSTATUS_RDSTATUS_VALUE(0) |
> +		HWSTATUS_RBSY_MASK(NAND_STATUS_READY) |
> +		HWSTATUS_RBSY_VALUE(NAND_STATUS_READY);
> +	writel(NAND_CMD_STATUS, nand->regs + HWSTATUS_CMD);
> +	writel(value, nand->regs + HWSTATUS_MASK);
> +
> +	init_completion(&nand->command_complete);
> +	init_completion(&nand->dma_complete);
> +
> +	mtd = &nand->mtd;
> +	mtd->name = dev_name(&pdev->dev);
> +	mtd->owner = THIS_MODULE;
> +	mtd->priv = &nand->chip;
> +
> +	mtd->type = MTD_NANDFLASH;
> +	mtd->flags = MTD_CAP_NANDFLASH;
> +
> +	/* clear interrupts */
> +	value = readl(nand->regs + ISR);
> +	writel(value, nand->regs + ISR);
> +
> +	writel(DMA_CTRL_IS_DONE, nand->regs + DMA_CTRL);
> +
> +	/* enable interrupts */
> +	value = IER_UND | IER_OVR | IER_CMD_DONE | IER_ECC_ERR | IER_GIE;
> +	writel(value, nand->regs + IER);
> +
> +	chip = &nand->chip;
> +	chip->cmdfunc = tegra_nand_command;
> +	chip->select_chip = tegra_nand_select_chip;
> +	chip->read_byte = tegra_nand_read_byte;
> +	chip->read_buf = tegra_nand_read_buf;
> +	chip->write_buf = tegra_nand_write_buf;
> +
> +	tegra_nand_setup_timing(nand, 0);
> +
> +	err = nand_scan_ident(mtd, 1, NULL);
> +	if (err)
> +		return err;
> +
> +	nand->data_buf = dmam_alloc_coherent(&pdev->dev, mtd->writesize,
> +					    &nand->data_dma, GFP_KERNEL);
> +	if (!nand->data_buf)
> +		return -ENOMEM;
> +
> +	nand->oob_buf = dmam_alloc_coherent(&pdev->dev, mtd->oobsize,
> +					    &nand->oob_dma, GFP_KERNEL);
> +	if (!nand->oob_buf)
> +		return -ENOMEM;
> +
> +	chip->ecc.mode = NAND_ECC_HW;
> +	chip->ecc.size = 512;
> +	chip->ecc.bytes = mtd->oobsize;
> +	chip->ecc.read_page = tegra_nand_read_page;
> +	chip->ecc.write_page = tegra_nand_write_page;
> +
> +	value = CFG_HW_ECC | CFG_ECC_SEL | CFG_ERR_COR | CFG_PIPE_EN |
> +		CFG_TVAL_8 | CFG_SKIP_SPARE | CFG_SKIP_SPARE_SIZE_4;
> +
> +	switch (mtd->oobsize) {
> +	case 16:
> +		chip->ecc.layout = &tegra_nand_oob_16;
> +		chip->ecc.strength = 1;
> +		value |= CFG_TAG_BYTE_SIZE(4);
> +		break;
> +	case 64:
> +		chip->ecc.layout = &tegra_nand_oob_64;
> +		chip->ecc.strength = 8;
> +		value |= CFG_TAG_BYTE_SIZE(36);
> +		break;
> +	case 128:
> +		chip->ecc.layout = &tegra_nand_oob_128;
> +		chip->ecc.strength = 8;
> +		value |= CFG_TAG_BYTE_SIZE(72);
> +		break;
> +	case 224:
> +		chip->ecc.layout = &tegra_nand_oob_224;
> +		chip->ecc.strength = 8;
> +		value |= CFG_TAG_BYTE_SIZE(144);
> +		break;
> +	default:
> +		dev_err(&pdev->dev, "unhandled OOB size %d\n", mtd->oobsize);
> +		return -ENODEV;
> +	}
> +
> +	switch (mtd->writesize) {
> +	case 256:
> +		value |= CFG_PS_256;
> +		break;
> +	case 512:
> +		value |= CFG_PS_512;
> +		break;
> +	case 1024:
> +		value |= CFG_PS_1024;
> +		break;
> +	case 2048:
> +		value |= CFG_PS_2048;
> +		break;
> +	case 4096:

Nit: we have macros for these values (SZ_256, ..., SZ_4K).

> +		value |= CFG_PS_4096;
> +		break;
> +	default:
> +		dev_err(&pdev->dev, "unhandled writesize %d\n", mtd->writesize);
> +		return -ENODEV;
> +	}
> +
> +	if (nand->buswidth == 16)
> +		value |= CFG_BUS_WIDTH_16;
> +
> +	writel(value, nand->regs + CFG);
> +
> +	tegra_nand_setup_chiptiming(nand);
> +
> +	err = nand_scan_tail(mtd);
> +	if (err)
> +		return err;
> +
> +	mtd_device_parse_register(mtd, NULL,
> +				  &(struct mtd_part_parser_data) {
> +					.of_node = pdev->dev.of_node,
> +				  },
> +				  NULL, 0);
> +
> +	platform_set_drvdata(pdev, nand);
> +
> +	return 0;
> +}
> +
> +static int tegra_nand_remove(struct platform_device *pdev)
> +{
> +	struct tegra_nand *nand = platform_get_drvdata(pdev);
> +
> +	nand_release(&nand->mtd);
> +
> +	clk_disable_unprepare(nand->clk);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id tegra_nand_of_match[] = {
> +	{ .compatible = "nvidia,tegra20-nand" },
> +	{ .compatible = "nvidia,tegra30-nand" },

AFAIK, having two compatible strings, but making no distinction between
them is typically frowned upon by devicetree maintainers.

Is the controller any different in tegra20 and tegra30?

If you are not sure about the controllers being different, you can
try the following approach. The devicetree is written like this:

nand@foo {
   compatible = "nvidia,tegra20-nand", "nvidia,tegra-nand";
};

So you only deal with "nvidia,tegra-nand" in the driver, yet the
devicetree files are prepared to deal with a difference.

> +	{ }
> +};
> +
> +static struct platform_driver tegra_nand_driver = {
> +	.driver = {
> +		.name = "tegra-nand",
> +		.of_match_table = tegra_nand_of_match,
> +	},
> +	.probe = tegra_nand_probe,
> +	.remove = tegra_nand_remove,
> +};
> +module_platform_driver(tegra_nand_driver);
> +
> +MODULE_DESCRIPTION("NVIDIA Tegra NAND driver");
> +MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de");
> +MODULE_AUTHOR("Lucas Stach <dev@lynxeye.de");
> +MODULE_LICENSE("GPL v2");
> +MODULE_DEVICE_TABLE(of, tegra_nand_of_match);
> 

Regards,
Lucas Stach Jan. 7, 2015, 12:17 a.m. UTC | #3
Am Dienstag, den 06.01.2015, 00:41 +0100 schrieb Stefan Agner:
> Hi Lucas,
> 
> Thanks for picking that up!
> 
> I did some short benchmarks on Colibri T20 V1.2, L4T. Write/read speeds
> I measured on the YAFFS2 based file system:
> 
> # dd if=/dev/zero of=test bs=50M count=1 conv=fdatasync
> 1+0 records in
> 1+0 records out
> 52428800 bytes (52 MB) copied, 9.88293 s, 5.3 MB/s
> 
> echo 3 > /proc/sys/vm/drop_caches
> # dd if=test of=/dev/zero bs=50M count=1
> 1+0 records in
> 1+0 records out
> 52428800 bytes (52 MB) copied, 5.97056 s, 8.8 MB/s
> 
Thanks, this puts things into perspective.

> So your values look quite realistic then!
> 
> Some comments below...
> 
> On 2015-01-04 21:39, Lucas Stach wrote:
> > Add support for the NAND flash controller found on NVIDIA
> > Tegra 2/3 SoCs. This is a largely reworked version of the driver
> > started by Thierry.
> > 
> > Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> > Signed-off-by: Lucas Stach <dev@lynxeye.de>
> > ---
> > I've tested this driver with the in-kernel mtd-tests and some
> > realworld workloads on a Colibri T20 module.
> > ---
> >  .../bindings/mtd/nvidia,tegra20-nand.txt           |  30 +
> >  MAINTAINERS                                        |   6 +
> >  drivers/mtd/nand/Kconfig                           |   6 +
> >  drivers/mtd/nand/Makefile                          |   1 +
> >  drivers/mtd/nand/tegra_nand.c                      | 794 +++++++++++++++++++++
> >  5 files changed, 837 insertions(+)
> >  create mode 100644
> > Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> >  create mode 100644 drivers/mtd/nand/tegra_nand.c
> > 

[...]

> > +static irqreturn_t tegra_nand_irq(int irq, void *data)
> > +{
> > +	struct tegra_nand *nand = data;
> > +	irqreturn_t ret = IRQ_HANDLED;
> > +	u32 isr, dma;
> > +
> > +	isr = readl(nand->regs + ISR);
> > +	dma = readl(nand->regs + DMA_CTRL);
> > +
> > +	if (!isr && !(dma & DMA_CTRL_IS_DONE)) {
> > +		ret = IRQ_NONE;
> > +		goto out;
> 
> The out label doesn't do anything more than just return. Why not just
> return IRQ_NONE here and return IRQ_HANDLED at the end, saves the local
> variable and helps readability...
> 
Yes, this function looked a bit more complicated when I started to clean
this. So not removing the out label was just an oversight.

> Why is this needed anyway, is the IRQ shared with other peripherals?
> 
> In the L4T driver, there is a warning message about spurious interrupts,
> does this works around this interrupts?
> 
I haven't seen any spurious IRQs during my testing. So not doing
anything special should be okay I think.

[...]

> > +}
> > +
> > +static void tegra_nand_setup_timing(struct tegra_nand *nand, int mode)
> > +{
> > +	unsigned long rate = clk_get_rate(nand->clk) / 1000000;
> > +	unsigned long period = 1000000 / rate;
> 
> Hm, period of a clock in ns... Sounds like a common use case. I searched
> for a macro/helper, but did not found anything. Well then.
> 
> > +	const struct nand_sdr_timings *timings;
> > +	u32 val, reg = 0;
> > +
> > +	timings = onfi_async_timing_mode_to_sdr_timings(mode);
> > +
> > +	val = max3(timings->tAR_min, timings->tRR_min,
> > +		   timings->tRC_min) / period;
> > +	if (val > 2)
> > +		val -= 2;
> 
> According to my TRM this is:
> Generated timing = (n+3) * NAND_CLK_PERIOD ns.
> 
> Shouldn't this look like
> 	if (val >= 3)
> 		val -= 3;
> then?
> 
> I see that all the calculations of the timings below are different then
> in TRM. I think we need to take the whole offset into account, or do I
> miss something here?
> 
> 
> > +	reg |= TIMING_TCR_TAR_TRR(val);
> > +
> > +	val = max(max(timings->tCS_min, timings->tCH_min),
> > +		  max(timings->tALS_min, timings->tALH_min)) / period;
> > +	if (val > 1)
> > +		val -= 1;
> > +	reg |= TIMING_TCS(val);
> 
> See mask error in macro definition.
> 
> > +
> > +	val = max(timings->tRP_min, timings->tREA_max) + 6000;
> > +	reg |= TIMING_TRP(val / 1000);
> > +	reg |= TIMING_TRP_RESP(val / period);
> > +
> > +	reg |= TIMING_TWB(timings->tWB_max / period);
> > +	reg |= TIMING_TWHR(timings->tWHR_min / period);
> > +	reg |= TIMING_TWH(timings->tWH_min / 1000);
> > +	reg |= TIMING_TWP(timings->tWP_min / 1000);
> > +	reg |= TIMING_TRH(timings->tRHW_min / 1000);
> 
> Why 1000 for those three values? In my TRM, those values are in
> NAND_CLK_PERIOD too.
> 

Thanks for noticing. I wrote that part half a year ago and don't know
the details anymore. I will look this up again (and fix if necessary)
and come back to you.

Regards,
Lucas

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas Stach Jan. 7, 2015, 12:24 a.m. UTC | #4
Am Dienstag, den 06.01.2015, 15:27 -0300 schrieb Ezequiel Garcia:
> On 01/04/2015 05:39 PM, Lucas Stach wrote:
> 
> Hi Lucas,
> 
> The driver looks mostly good. Just a few comments on my side.
> 
> > Add support for the NAND flash controller found on NVIDIA
> > Tegra 2/3 SoCs. This is a largely reworked version of the driver
> > started by Thierry.
> > 
> > Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> > Signed-off-by: Lucas Stach <dev@lynxeye.de>
> > ---
> > I've tested this driver with the in-kernel mtd-tests and some
> > realworld workloads on a Colibri T20 module.
> > ---
> >  .../bindings/mtd/nvidia,tegra20-nand.txt           |  30 +
> >  MAINTAINERS                                        |   6 +
> >  drivers/mtd/nand/Kconfig                           |   6 +
> >  drivers/mtd/nand/Makefile                          |   1 +
> >  drivers/mtd/nand/tegra_nand.c                      | 794 +++++++++++++++++++++
> >  5 files changed, 837 insertions(+)
> >  create mode 100644 Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> >  create mode 100644 drivers/mtd/nand/tegra_nand.c
> > 
> > diff --git a/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> > new file mode 100644
> > index 0000000..088223c
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> > @@ -0,0 +1,30 @@
> > +NVIDIA Tegra NAND Flash controller
> > +
> > +Required properties:
> > +- compatible: Must be one of:
> > +  - "nvidia,tegra20-nand"
> > +  - "nvidia,tegra30-nand"
> > +- reg: MMIO address range
> > +- interrupts: interrupt output of the NFC controller
> > +- clocks: Must contain an entry for each entry in clock-names.
> > +  See ../clocks/clock-bindings.txt for details.
> > +- clock-names: Must include the following entries:
> > +  - nand
> > +- resets: Must contain an entry for each entry in reset-names.
> > +  See ../reset/reset.txt for details.
> > +- reset-names: Must include the following entries:
> > +  - nand
> > +
> > +Optional properties:
> > +- nvidia,wp-gpios: GPIO used to disable write protection of the flash
> > +
> > +  Example:
> > +	nand@70008000 {
> > +		compatible = "nvidia,tegra20-nand";
> > +		reg = <0x70008000 0x100>;
> > +		interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
> > +		clocks = <&tegra_car TEGRA20_CLK_NDFLASH>;
> > +		clock-names = "nand";
> > +		resets = <&tegra_car 13>;
> > +		reset-names = "nand";
> > +	};
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index ddb9ac8..972e31d 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -9459,6 +9459,12 @@ M:	Laxman Dewangan <ldewangan@nvidia.com>
> >  S:	Supported
> >  F:	drivers/input/keyboard/tegra-kbc.c
> >  
> > +TEGRA NAND DRIVER
> > +M:	Lucas Stach <dev@lynxeye.de>
> > +S:	Maintained
> > +F:	Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> > +F:	drivers/mtd/nand/tegra_nand.c
> > +
> >  TEGRA PWM DRIVER
> >  M:	Thierry Reding <thierry.reding@gmail.com>
> >  S:	Supported
> > diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
> > index 7d0150d..1eafd4e 100644
> > --- a/drivers/mtd/nand/Kconfig
> > +++ b/drivers/mtd/nand/Kconfig
> > @@ -524,4 +524,10 @@ config MTD_NAND_SUNXI
> >  	help
> >  	  Enables support for NAND Flash chips on Allwinner SoCs.
> >  
> > +config MTD_NAND_TEGRA
> > +	tristate "Support for NAND on NVIDIA Tegra"
> > +	depends on ARCH_TEGRA || COMPILE_TEST
> > +	help
> > +	  Enables support for NAND flash on NVIDIA Tegra SoC based boards.
> > +
> >  endif # MTD_NAND
> > diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
> > index bd38f21..58399ce 100644
> > --- a/drivers/mtd/nand/Makefile
> > +++ b/drivers/mtd/nand/Makefile
> > @@ -51,5 +51,6 @@ obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
> >  obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
> >  obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
> >  obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
> > +obj-$(CONFIG_MTD_NAND_TEGRA)		+= tegra_nand.o
> >  
> >  nand-objs := nand_base.o nand_bbt.o nand_timings.o
> > diff --git a/drivers/mtd/nand/tegra_nand.c b/drivers/mtd/nand/tegra_nand.c
> > new file mode 100644
> > index 0000000..b919a6e
> > --- /dev/null
> > +++ b/drivers/mtd/nand/tegra_nand.c
> > @@ -0,0 +1,794 @@
> > +/*
> > + * Copyright (C) 2014-2015 Lucas Stach <dev@lynxeye.de>
> > + * Copyright (C) 2012 Avionic Design GmbH
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/clk.h>
> > +#include <linux/completion.h>
> > +#include <linux/delay.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/err.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/module.h>
> > +#include <linux/mtd/nand.h>
> > +#include <linux/mtd/partitions.h>
> > +#include <linux/of_gpio.h>
> > +#include <linux/of_mtd.h>
> > +#include <linux/of.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/reset.h>
> > +
> > +#define CMD				0x00
> > +#define   CMD_GO			(1 << 31)
> > +#define   CMD_CLE			(1 << 30)
> > +#define   CMD_ALE			(1 << 29)
> > +#define   CMD_PIO			(1 << 28)
> > +#define   CMD_TX			(1 << 27)
> > +#define   CMD_RX			(1 << 26)
> 
> How about using BIT() ?
> 
> > +#define   CMD_SEC_CMD			(1 << 25)
> > +#define   CMD_AFT_DAT			(1 << 24)
> > +#define   CMD_TRANS_SIZE(x)		(((x) & 0xf) << 20)
> > +#define   CMD_A_VALID			(1 << 19)
> > +#define   CMD_B_VALID			(1 << 18)
> > +#define   CMD_RD_STATUS_CHK		(1 << 17)
> > +#define   CMD_RBSY_CHK			(1 << 16)
> > +#define   CMD_CE(x)			(1 << (8 + ((x) & 0x7)))
> > +#define   CMD_CLE_SIZE(x)		(((x) & 0x3) << 4)
> > +#define   CMD_ALE_SIZE(x)		(((x) & 0xf) << 0)
> > +
> > +#define STATUS				0x04
> > +
> > +#define ISR				0x08
> > +#define   ISR_UND			(1 << 7)
> > +#define   ISR_OVR			(1 << 6)
> > +#define   ISR_CMD_DONE			(1 << 5)
> > +#define   ISR_ECC_ERR			(1 << 4)
> > +
> > +#define IER				0x0c
> > +#define   IER_ERR_TRIG_VAL(x)		(((x) & 0xf) << 16)
> > +#define   IER_UND			(1 << 7)
> > +#define   IER_OVR			(1 << 6)
> > +#define   IER_CMD_DONE			(1 << 5)
> > +#define   IER_ECC_ERR			(1 << 4)
> > +#define   IER_GIE			(1 << 0)
> > +
> > +#define CFG				0x10
> > +#define   CFG_HW_ECC			(1 << 31)
> > +#define   CFG_ECC_SEL			(1 << 30)
> > +#define   CFG_ERR_COR			(1 << 29)
> > +#define   CFG_PIPE_EN			(1 << 28)
> > +#define   CFG_TVAL_4			(0 << 24)
> > +#define   CFG_TVAL_6			(1 << 24)
> > +#define   CFG_TVAL_8			(2 << 24)
> > +#define   CFG_SKIP_SPARE		(1 << 23)
> > +#define   CFG_BUS_WIDTH_8		(0 << 21)
> > +#define   CFG_BUS_WIDTH_16		(1 << 21)
> > +#define   CFG_COM_BSY			(1 << 20)
> > +#define   CFG_PS_256			(0 << 16)
> > +#define   CFG_PS_512			(1 << 16)
> > +#define   CFG_PS_1024			(2 << 16)
> > +#define   CFG_PS_2048			(3 << 16)
> > +#define   CFG_PS_4096			(4 << 16)
> > +#define   CFG_SKIP_SPARE_SIZE_4		(0 << 14)
> > +#define   CFG_SKIP_SPARE_SIZE_8		(1 << 14)
> > +#define   CFG_SKIP_SPARE_SIZE_12	(2 << 14)
> > +#define   CFG_SKIP_SPARE_SIZE_16	(3 << 14)
> > +#define   CFG_TAG_BYTE_SIZE(x)		((x) & 0xff)
> > +
> > +#define TIMING_1			0x14
> > +#define   TIMING_TRP_RESP(x)		(((x) & 0xf) << 28)
> > +#define   TIMING_TWB(x)			(((x) & 0xf) << 24)
> > +#define   TIMING_TCR_TAR_TRR(x)		(((x) & 0xf) << 20)
> > +#define   TIMING_TWHR(x)		(((x) & 0xf) << 16)
> > +#define   TIMING_TCS(x)			(((x) & 0xc) << 14)
> > +#define   TIMING_TWH(x)			(((x) & 0x3) << 12)
> > +#define   TIMING_TWP(x)			(((x) & 0xf) <<  8)
> > +#define   TIMING_TRH(x)			(((x) & 0xf) <<  4)
> > +#define   TIMING_TRP(x)			(((x) & 0xf) <<  0)
> > +
> > +#define RESP				0x18
> > +
> > +#define TIMING_2			0x1c
> > +#define   TIMING_TADL(x)		((x) & 0xf)
> > +
> > +#define CMD_1				0x20
> > +#define CMD_2				0x24
> > +#define ADDR_1				0x28
> > +#define ADDR_2				0x2c
> > +
> > +#define DMA_CTRL			0x30
> > +#define   DMA_CTRL_GO			(1 << 31)
> > +#define   DMA_CTRL_IN			(0 << 30)
> > +#define   DMA_CTRL_OUT			(1 << 30)
> > +#define   DMA_CTRL_PERF_EN		(1 << 29)
> > +#define   DMA_CTRL_IE_DONE		(1 << 28)
> > +#define   DMA_CTRL_REUSE		(1 << 27)
> > +#define   DMA_CTRL_BURST_1		(2 << 24)
> > +#define   DMA_CTRL_BURST_4		(3 << 24)
> > +#define   DMA_CTRL_BURST_8		(4 << 24)
> > +#define   DMA_CTRL_BURST_16		(5 << 24)
> > +#define   DMA_CTRL_IS_DONE		(1 << 20)
> > +#define   DMA_CTRL_EN_A			(1 <<  2)
> > +#define   DMA_CTRL_EN_B			(1 <<  1)
> > +
> > +#define DMA_CFG_A			0x34
> > +#define DMA_CFG_B			0x38
> > +
> > +#define FIFO_CTRL			0x3c
> > +#define   FIFO_CTRL_CLR_ALL		(1 << 3)
> > +
> > +#define DATA_PTR			0x40
> > +#define TAG_PTR				0x44
> > +#define ECC_PTR				0x48
> > +
> > +#define HWSTATUS_CMD			0x50
> > +#define HWSTATUS_MASK			0x54
> > +#define   HWSTATUS_RDSTATUS_MASK(x)	(((x) & 0xff) << 24)
> > +#define   HWSTATUS_RDSTATUS_VALUE(x)	(((x) & 0xff) << 16)
> > +#define   HWSTATUS_RBSY_MASK(x)		(((x) & 0xff) << 8)
> > +#define   HWSTATUS_RBSY_VALUE(x)	(((x) & 0xff) << 0)
> > +
> > +#define DEC_RESULT			0xd0
> > +#define   DEC_RESULT_CORRFAIL		(1 << 8)
> > +
> > +#define DEC_STATUS_BUF			0xd4
> > +#define   DEC_STATUS_BUF_FAIL_SEC_FLAG(x)	((x) & (0xff << 24))
> > +#define   DEC_STATUS_BUF_CORR_SEC_FLAG(x)	((x) & (0xff << 16))
> > +#define   DEC_STATUS_BUF_MAX_CORR_CNT(x)	(((x) & 0xf00) >> 8)
> > +
> > +struct tegra_nand {
> > +	void __iomem *regs;
> > +	int irq;
> 
> Seems like you don't need to store irq.
> 
> > +	struct clk *clk;
> > +	struct reset_control *rst;
> > +	int wp_gpio;
> > +	int buswidth;
> 
> And also you don't seem to need either wp_gpio or buswidth stored
> in the struct. You only use them at probe time.
> 

I'll keep the wp_gpio, as I still hope to use this to WP the NAND when
no write is pending. I'll fix the others.

> > +
> > +	struct nand_chip chip;
> > +	struct mtd_info mtd;
> > +	struct device *dev;
> > +
> > +	struct completion command_complete;
> > +	struct completion dma_complete;
> > +
> > +	dma_addr_t data_dma;
> > +	void *data_buf;
> > +	dma_addr_t oob_dma;
> > +	void *oob_buf;
> > +
> > +	int cur_chip;
> > +};

[...]

> > +static int tegra_nand_parse_dt(struct device_node *node,
> > +			       struct tegra_nand *nand)
> > +{
> > +	enum of_gpio_flags flags;
> > +
> > +	nand->wp_gpio = of_get_named_gpio_flags(node, "nvidia,wp-gpios", 0,
> > +						 &flags);
> > +	if (nand->wp_gpio < 0)
> > +		nand->wp_gpio = 0;
> > +
> > +	nand->buswidth = of_get_nand_bus_width(node);
> > +	if (nand->buswidth < 0)
> > +		return nand->buswidth;
> > +
> 
> I believe you should set NAND_BUSWIDTH_16 flag in nand_chip.options,
> before calling nand_scan_ident?
> 
> Also, if you just access the of_get_nand_bus_width before nand_scan_ident
> you can drop the nand->buswidth field.
> 
> Same goes for wp_gpio.
> 

Right, will do.

> > +	return 0;
> > +}
> > +
[...]

> > +
> > +static const struct of_device_id tegra_nand_of_match[] = {
> > +	{ .compatible = "nvidia,tegra20-nand" },
> > +	{ .compatible = "nvidia,tegra30-nand" },
> 
> AFAIK, having two compatible strings, but making no distinction between
> them is typically frowned upon by devicetree maintainers.
> 
> Is the controller any different in tegra20 and tegra30?
> 
> If you are not sure about the controllers being different, you can
> try the following approach. The devicetree is written like this:
> 
> nand@foo {
>    compatible = "nvidia,tegra20-nand", "nvidia,tegra-nand";
> };
> 
> So you only deal with "nvidia,tegra-nand" in the driver, yet the
> devicetree files are prepared to deal with a difference.
> 

I believe that tegra30-nand is actually a bit different from tegra20 (at
least on more clock I know about), but obviously this driver doesn't
handle those differences and I don't know if I ever get to see Tegra30
hardware with NAND. Given that I think it's best to just remove the
tegra30-nand compatible for now and add it back if someone has hardware
to test with.

Regards,
Lucas

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Jan. 7, 2015, 1:45 p.m. UTC | #5
On Wed, Jan 07, 2015 at 01:24:38AM +0100, Lucas Stach wrote:
> Am Dienstag, den 06.01.2015, 15:27 -0300 schrieb Ezequiel Garcia:
> > On 01/04/2015 05:39 PM, Lucas Stach wrote:
[...]
> > > diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
> > > index 7d0150d..1eafd4e 100644
> > > --- a/drivers/mtd/nand/Kconfig
> > > +++ b/drivers/mtd/nand/Kconfig
> > > @@ -524,4 +524,10 @@ config MTD_NAND_SUNXI
> > >  	help
> > >  	  Enables support for NAND Flash chips on Allwinner SoCs.
> > >  
> > > +config MTD_NAND_TEGRA
> > > +	tristate "Support for NAND on NVIDIA Tegra"
> > > +	depends on ARCH_TEGRA || COMPILE_TEST

I think you're going to need a bunch more dependencies if you use
COMPILE_TEST. Otherwise we're going to get all kinds of build failure
reports.

> > > diff --git a/drivers/mtd/nand/tegra_nand.c b/drivers/mtd/nand/tegra_nand.c
[...]
> > > +struct tegra_nand {
> > > +	void __iomem *regs;
> > > +	int irq;
> > 
> > Seems like you don't need to store irq.
> > 
> > > +	struct clk *clk;
> > > +	struct reset_control *rst;
> > > +	int wp_gpio;
> > > +	int buswidth;
> > 
> > And also you don't seem to need either wp_gpio or buswidth stored
> > in the struct. You only use them at probe time.
> > 
> 
> I'll keep the wp_gpio, as I still hope to use this to WP the NAND when
> no write is pending. I'll fix the others.

Maybe use the gpiod_*() API since the old one is new deprecated?

> > > +static const struct of_device_id tegra_nand_of_match[] = {
> > > +	{ .compatible = "nvidia,tegra20-nand" },
> > > +	{ .compatible = "nvidia,tegra30-nand" },
> > 
> > AFAIK, having two compatible strings, but making no distinction between
> > them is typically frowned upon by devicetree maintainers.
> > 
> > Is the controller any different in tegra20 and tegra30?
> > 
> > If you are not sure about the controllers being different, you can
> > try the following approach. The devicetree is written like this:
> > 
> > nand@foo {
> >    compatible = "nvidia,tegra20-nand", "nvidia,tegra-nand";
> > };
> > 
> > So you only deal with "nvidia,tegra-nand" in the driver, yet the
> > devicetree files are prepared to deal with a difference.

I think it's been more common to have something like this:

	tegra20.dtsi:

		nand-controller@70008000 {
			compatible = "nvidia,tegra20-nand";
			...
		};

	tegra30.dtsi:

		nand-controller@70008000 {
			compatible = "nvidia,tegra30-nand", "nvidia,tegra20-nand";
			...
		};

The idea being that if the Tegra30 variant is indeed compatible with the
Tegra20 variant, the driver can match on "nvidia,tegra20-nand". But at
the same time the DTB has the more specific compatible in case the
driver ever needs to handle generation-specific quirks, or implement any
additional functionality added in Tegra30 that wasn't available in early
generations.

> I believe that tegra30-nand is actually a bit different from tegra20 (at
> least on more clock I know about), but obviously this driver doesn't
> handle those differences and I don't know if I ever get to see Tegra30
> hardware with NAND. Given that I think it's best to just remove the
> tegra30-nand compatible for now and add it back if someone has hardware
> to test with.

Yes, that sounds like the best option for now.

Thierry
Boris Brezillon Jan. 10, 2015, 5:35 p.m. UTC | #6
Hi Lucas,

Have you tried running mtd tests on your driver, if you did, can you
give the results in a cover letter, and if you didn't, can you launch
them.

On Sun,  4 Jan 2015 21:39:17 +0100
Lucas Stach <dev@lynxeye.de> wrote:

> Add support for the NAND flash controller found on NVIDIA
> Tegra 2/3 SoCs. This is a largely reworked version of the driver
> started by Thierry.
> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> ---
> I've tested this driver with the in-kernel mtd-tests and some
> realworld workloads on a Colibri T20 module.
> ---
>  .../bindings/mtd/nvidia,tegra20-nand.txt           |  30 +
>  MAINTAINERS                                        |   6 +
>  drivers/mtd/nand/Kconfig                           |   6 +
>  drivers/mtd/nand/Makefile                          |   1 +
>  drivers/mtd/nand/tegra_nand.c                      | 794 +++++++++++++++++++++
>  5 files changed, 837 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
>  create mode 100644 drivers/mtd/nand/tegra_nand.c
> 
> diff --git a/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> new file mode 100644
> index 0000000..088223c
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> @@ -0,0 +1,30 @@
> +NVIDIA Tegra NAND Flash controller
> +
> +Required properties:
> +- compatible: Must be one of:
> +  - "nvidia,tegra20-nand"
> +  - "nvidia,tegra30-nand"
> +- reg: MMIO address range
> +- interrupts: interrupt output of the NFC controller
> +- clocks: Must contain an entry for each entry in clock-names.
> +  See ../clocks/clock-bindings.txt for details.
> +- clock-names: Must include the following entries:
> +  - nand
> +- resets: Must contain an entry for each entry in reset-names.
> +  See ../reset/reset.txt for details.
> +- reset-names: Must include the following entries:
> +  - nand
> +
> +Optional properties:
> +- nvidia,wp-gpios: GPIO used to disable write protection of the flash
> +
> +  Example:
> +	nand@70008000 {
> +		compatible = "nvidia,tegra20-nand";
> +		reg = <0x70008000 0x100>;
> +		interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
> +		clocks = <&tegra_car TEGRA20_CLK_NDFLASH>;
> +		clock-names = "nand";
> +		resets = <&tegra_car 13>;
> +		reset-names = "nand";
> +	};

According to the CMD_CE macro, your NAND controller seems to support
multiple chips. If this is the case, maybe you should represent it with
one nand-controller node and nand chips as children of this controller
(see the sunxi controller binding [1]).

BTW, not that I really care :-), but I thought DT bindings had to be
submitted in their own patch.

> diff --git a/MAINTAINERS b/MAINTAINERS
> index ddb9ac8..972e31d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -9459,6 +9459,12 @@ M:	Laxman Dewangan <ldewangan@nvidia.com>
>  S:	Supported
>  F:	drivers/input/keyboard/tegra-kbc.c
>  
> +TEGRA NAND DRIVER
> +M:	Lucas Stach <dev@lynxeye.de>
> +S:	Maintained
> +F:	Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
> +F:	drivers/mtd/nand/tegra_nand.c
> +
>  TEGRA PWM DRIVER
>  M:	Thierry Reding <thierry.reding@gmail.com>
>  S:	Supported
> diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
> index 7d0150d..1eafd4e 100644
> --- a/drivers/mtd/nand/Kconfig
> +++ b/drivers/mtd/nand/Kconfig
> @@ -524,4 +524,10 @@ config MTD_NAND_SUNXI
>  	help
>  	  Enables support for NAND Flash chips on Allwinner SoCs.
>  
> +config MTD_NAND_TEGRA
> +	tristate "Support for NAND on NVIDIA Tegra"
> +	depends on ARCH_TEGRA || COMPILE_TEST
> +	help
> +	  Enables support for NAND flash on NVIDIA Tegra SoC based boards.
> +
>  endif # MTD_NAND
> diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
> index bd38f21..58399ce 100644
> --- a/drivers/mtd/nand/Makefile
> +++ b/drivers/mtd/nand/Makefile
> @@ -51,5 +51,6 @@ obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
>  obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
>  obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
>  obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
> +obj-$(CONFIG_MTD_NAND_TEGRA)		+= tegra_nand.o
>  
>  nand-objs := nand_base.o nand_bbt.o nand_timings.o
> diff --git a/drivers/mtd/nand/tegra_nand.c b/drivers/mtd/nand/tegra_nand.c
> new file mode 100644
> index 0000000..b919a6e
> --- /dev/null
> +++ b/drivers/mtd/nand/tegra_nand.c
> @@ -0,0 +1,794 @@
> +/*
> + * Copyright (C) 2014-2015 Lucas Stach <dev@lynxeye.de>
> + * Copyright (C) 2012 Avionic Design GmbH
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/clk.h>
> +#include <linux/completion.h>
> +#include <linux/delay.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/err.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/module.h>
> +#include <linux/mtd/nand.h>
> +#include <linux/mtd/partitions.h>
> +#include <linux/of_gpio.h>
> +#include <linux/of_mtd.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/reset.h>
> +
> +#define CMD				0x00
> +#define   CMD_GO			(1 << 31)
> +#define   CMD_CLE			(1 << 30)
> +#define   CMD_ALE			(1 << 29)
> +#define   CMD_PIO			(1 << 28)
> +#define   CMD_TX			(1 << 27)
> +#define   CMD_RX			(1 << 26)
> +#define   CMD_SEC_CMD			(1 << 25)
> +#define   CMD_AFT_DAT			(1 << 24)
> +#define   CMD_TRANS_SIZE(x)		(((x) & 0xf) << 20)
> +#define   CMD_A_VALID			(1 << 19)
> +#define   CMD_B_VALID			(1 << 18)
> +#define   CMD_RD_STATUS_CHK		(1 << 17)
> +#define   CMD_RBSY_CHK			(1 << 16)
> +#define   CMD_CE(x)			(1 << (8 + ((x) & 0x7)))
> +#define   CMD_CLE_SIZE(x)		(((x) & 0x3) << 4)
> +#define   CMD_ALE_SIZE(x)		(((x) & 0xf) << 0)
> +
> +#define STATUS				0x04
> +
> +#define ISR				0x08
> +#define   ISR_UND			(1 << 7)
> +#define   ISR_OVR			(1 << 6)
> +#define   ISR_CMD_DONE			(1 << 5)
> +#define   ISR_ECC_ERR			(1 << 4)
> +
> +#define IER				0x0c
> +#define   IER_ERR_TRIG_VAL(x)		(((x) & 0xf) << 16)
> +#define   IER_UND			(1 << 7)
> +#define   IER_OVR			(1 << 6)
> +#define   IER_CMD_DONE			(1 << 5)
> +#define   IER_ECC_ERR			(1 << 4)
> +#define   IER_GIE			(1 << 0)
> +
> +#define CFG				0x10
> +#define   CFG_HW_ECC			(1 << 31)
> +#define   CFG_ECC_SEL			(1 << 30)
> +#define   CFG_ERR_COR			(1 << 29)
> +#define   CFG_PIPE_EN			(1 << 28)
> +#define   CFG_TVAL_4			(0 << 24)
> +#define   CFG_TVAL_6			(1 << 24)
> +#define   CFG_TVAL_8			(2 << 24)
> +#define   CFG_SKIP_SPARE		(1 << 23)
> +#define   CFG_BUS_WIDTH_8		(0 << 21)
> +#define   CFG_BUS_WIDTH_16		(1 << 21)
> +#define   CFG_COM_BSY			(1 << 20)
> +#define   CFG_PS_256			(0 << 16)
> +#define   CFG_PS_512			(1 << 16)
> +#define   CFG_PS_1024			(2 << 16)
> +#define   CFG_PS_2048			(3 << 16)
> +#define   CFG_PS_4096			(4 << 16)
> +#define   CFG_SKIP_SPARE_SIZE_4		(0 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_8		(1 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_12	(2 << 14)
> +#define   CFG_SKIP_SPARE_SIZE_16	(3 << 14)
> +#define   CFG_TAG_BYTE_SIZE(x)		((x) & 0xff)
> +
> +#define TIMING_1			0x14
> +#define   TIMING_TRP_RESP(x)		(((x) & 0xf) << 28)
> +#define   TIMING_TWB(x)			(((x) & 0xf) << 24)
> +#define   TIMING_TCR_TAR_TRR(x)		(((x) & 0xf) << 20)
> +#define   TIMING_TWHR(x)		(((x) & 0xf) << 16)
> +#define   TIMING_TCS(x)			(((x) & 0xc) << 14)
> +#define   TIMING_TWH(x)			(((x) & 0x3) << 12)
> +#define   TIMING_TWP(x)			(((x) & 0xf) <<  8)
> +#define   TIMING_TRH(x)			(((x) & 0xf) <<  4)
> +#define   TIMING_TRP(x)			(((x) & 0xf) <<  0)
> +
> +#define RESP				0x18
> +
> +#define TIMING_2			0x1c
> +#define   TIMING_TADL(x)		((x) & 0xf)
> +
> +#define CMD_1				0x20
> +#define CMD_2				0x24
> +#define ADDR_1				0x28
> +#define ADDR_2				0x2c
> +
> +#define DMA_CTRL			0x30
> +#define   DMA_CTRL_GO			(1 << 31)
> +#define   DMA_CTRL_IN			(0 << 30)
> +#define   DMA_CTRL_OUT			(1 << 30)
> +#define   DMA_CTRL_PERF_EN		(1 << 29)
> +#define   DMA_CTRL_IE_DONE		(1 << 28)
> +#define   DMA_CTRL_REUSE		(1 << 27)
> +#define   DMA_CTRL_BURST_1		(2 << 24)
> +#define   DMA_CTRL_BURST_4		(3 << 24)
> +#define   DMA_CTRL_BURST_8		(4 << 24)
> +#define   DMA_CTRL_BURST_16		(5 << 24)
> +#define   DMA_CTRL_IS_DONE		(1 << 20)
> +#define   DMA_CTRL_EN_A			(1 <<  2)
> +#define   DMA_CTRL_EN_B			(1 <<  1)
> +
> +#define DMA_CFG_A			0x34
> +#define DMA_CFG_B			0x38
> +
> +#define FIFO_CTRL			0x3c
> +#define   FIFO_CTRL_CLR_ALL		(1 << 3)
> +
> +#define DATA_PTR			0x40
> +#define TAG_PTR				0x44
> +#define ECC_PTR				0x48
> +
> +#define HWSTATUS_CMD			0x50
> +#define HWSTATUS_MASK			0x54
> +#define   HWSTATUS_RDSTATUS_MASK(x)	(((x) & 0xff) << 24)
> +#define   HWSTATUS_RDSTATUS_VALUE(x)	(((x) & 0xff) << 16)
> +#define   HWSTATUS_RBSY_MASK(x)		(((x) & 0xff) << 8)
> +#define   HWSTATUS_RBSY_VALUE(x)	(((x) & 0xff) << 0)
> +
> +#define DEC_RESULT			0xd0
> +#define   DEC_RESULT_CORRFAIL		(1 << 8)
> +
> +#define DEC_STATUS_BUF			0xd4
> +#define   DEC_STATUS_BUF_FAIL_SEC_FLAG(x)	((x) & (0xff << 24))
> +#define   DEC_STATUS_BUF_CORR_SEC_FLAG(x)	((x) & (0xff << 16))
> +#define   DEC_STATUS_BUF_MAX_CORR_CNT(x)	(((x) & 0xf00) >> 8)
> +
> +struct tegra_nand {
> +	void __iomem *regs;
> +	int irq;
> +	struct clk *clk;
> +	struct reset_control *rst;
> +	int wp_gpio;
> +	int buswidth;
> +
> +	struct nand_chip chip;
> +	struct mtd_info mtd;
> +	struct device *dev;
> +
> +	struct completion command_complete;
> +	struct completion dma_complete;
> +
> +	dma_addr_t data_dma;
> +	void *data_buf;
> +	dma_addr_t oob_dma;
> +	void *oob_buf;
> +
> +	int cur_chip;
> +};
> +
> +static inline struct tegra_nand *to_tegra_nand(struct mtd_info *mtd)
> +{
> +	return container_of(mtd, struct tegra_nand, mtd);
> +}
> +
> +static struct nand_ecclayout tegra_nand_oob_16 = {
> +	.eccbytes = 4,
> +	.eccpos = { 3, 4, 5, 6 },
> +	.oobfree = {
> +		{ .offset = 8, . length = 8 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_64 = {
> +	.eccbytes = 36,
> +	.eccpos = {
> +		 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
> +		19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
> +		35, 36, 37, 38, 39
> +	},
> +	.oobfree = {
> +		{ .offset = 40, .length = 20 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_128 = {
> +	.eccbytes = 72,
> +	.eccpos = {
> +		 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
> +		19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
> +		35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
> +		51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
> +		67, 68, 69, 70, 71, 72, 73, 74, 75
> +	},
> +	.oobfree = {
> +		{ .offset = 76, .length = 52 }
> +	}
> +};
> +
> +static struct nand_ecclayout tegra_nand_oob_224 = {
> +	.eccbytes = 144,
> +	.eccpos = {
> +		  3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +		 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
> +		 27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
> +		 39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,
> +		 51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,
> +		 63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
> +		 75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,
> +		 87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,
> +		 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
> +		111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
> +		123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
> +		135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
> +		147
> +	},
> +	.oobfree = {
> +		{ .offset = 148, .length = 76 }
> +	}
> +};

If you want to support as much NAND references as possible (I mean
those with oob size > 224), maybe these layouts should be dynamically
defined (based on the chosen ECC strength and step size).
See what's done in nand_bch.c.

> +
> +static irqreturn_t tegra_nand_irq(int irq, void *data)
> +{
> +	struct tegra_nand *nand = data;
> +	irqreturn_t ret = IRQ_HANDLED;
> +	u32 isr, dma;
> +
> +	isr = readl(nand->regs + ISR);
> +	dma = readl(nand->regs + DMA_CTRL);
> +
> +	if (!isr && !(dma & DMA_CTRL_IS_DONE)) {
> +		ret = IRQ_NONE;
> +		goto out;
> +	}
> +
> +	if (isr & ISR_CMD_DONE)
> +		complete(&nand->command_complete);
> +
> +	if (isr & ISR_UND)
> +		dev_dbg(nand->dev, "  FIFO underrun\n");
> +
> +	if (isr & ISR_OVR)
> +		dev_dbg(nand->dev, "  FIFO overrun\n");
> +
> +	/* handle DMA interrupts */
> +	if (dma & DMA_CTRL_IS_DONE) {
> +		writel(dma, nand->regs + DMA_CTRL);
> +		complete(&nand->dma_complete);
> +	}
> +
> +	/* clear interrupts */
> +	writel(isr, nand->regs + ISR);
> +
> +out:
> +	return ret;
> +}
> +
> +static void tegra_nand_command(struct mtd_info *mtd, unsigned int command,
> +			       int column, int page_addr)
> +{
> +	struct tegra_nand *nand = to_tegra_nand(mtd);
> +	u32 value;
> +
> +	switch (command) {
> +	case NAND_CMD_READOOB:
> +		column += mtd->writesize;
> +		/* fall-through */
> +
> +	case NAND_CMD_READ0:
> +		writel(NAND_CMD_READ0, nand->regs + CMD_1);
> +		writel(NAND_CMD_READSTART, nand->regs + CMD_2);
> +
> +		value = (page_addr << 16) | (column & 0xffff);
> +		writel(value, nand->regs + ADDR_1);
> +
> +		value = page_addr >> 16;
> +		writel(value, nand->regs + ADDR_2);
> +
> +		value = CMD_CLE | CMD_ALE | CMD_ALE_SIZE(4) | CMD_SEC_CMD |
> +			CMD_RBSY_CHK | CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_SEQIN:
> +		writel(NAND_CMD_SEQIN, nand->regs + CMD_1);
> +
> +		value = (page_addr << 16) | (column & 0xffff);
> +		writel(value, nand->regs + ADDR_1);
> +
> +		value = page_addr >> 16;
> +		writel(value, nand->regs + ADDR_2);
> +
> +		value = CMD_CLE | CMD_ALE | CMD_ALE_SIZE(4) |
> +			CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_PAGEPROG:
> +		writel(NAND_CMD_PAGEPROG, nand->regs + CMD_1);
> +
> +		value = CMD_CLE | CMD_CE(nand->cur_chip) | CMD_GO;
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_READID:
> +		writel(NAND_CMD_READID, nand->regs + CMD_1);
> +		writel(column & 0xff, nand->regs + ADDR_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_ERASE1:
> +		writel(NAND_CMD_ERASE1, nand->regs + CMD_1);
> +		writel(NAND_CMD_ERASE2, nand->regs + CMD_2);
> +		writel(page_addr, nand->regs + ADDR_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_ALE_SIZE(2) |
> +			CMD_SEC_CMD | CMD_RBSY_CHK | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_ERASE2:
> +		return;
> +
> +	case NAND_CMD_STATUS:
> +		writel(NAND_CMD_STATUS, nand->regs + CMD_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_PARAM:
> +		writel(NAND_CMD_PARAM, nand->regs + CMD_1);
> +		writel(column & 0xff, nand->regs + ADDR_1);
> +		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	case NAND_CMD_RESET:
> +		writel(NAND_CMD_RESET, nand->regs + CMD_1);
> +
> +		value = CMD_GO | CMD_CLE | CMD_CE(nand->cur_chip);
> +		writel(value, nand->regs + CMD);
> +		break;
> +
> +	default:
> +		dev_warn(nand->dev, "unsupported command: %x\n", command);
> +		return;
> +	}
> +
> +	wait_for_completion(&nand->command_complete);
> +}

Have you tried defining cmd_ctrl and dev_ready instead of
reimplementing the nand_command function (already provided by
nand_base.c) ?
Here is a quick rework [2] (not sure this can work though).

That's all I got for now.

By the way, if you plan to support addressing multiple NAND chips with
this controller you might want to take a look at the sunxi_nand driver
[3].

Best Regards,

Boris

[1]https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/mtd/sunxi-nand.txt?id=refs/tags/v3.19-rc3
[2]http://code.bulix.org/tljno2-87698
[3]https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/drivers/mtd/nand/sunxi_nand.c?id=refs/tags/v3.19-rc3
Boris Brezillon Jan. 10, 2015, 6:20 p.m. UTC | #7
On Sun,  4 Jan 2015 21:39:17 +0100
Lucas Stach <dev@lynxeye.de> wrote:

> Add support for the NAND flash controller found on NVIDIA
> Tegra 2/3 SoCs. This is a largely reworked version of the driver
> started by Thierry.
> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> ---
> I've tested this driver with the in-kernel mtd-tests and some
> realworld workloads on a Colibri T20 module.
> ---
>  .../bindings/mtd/nvidia,tegra20-nand.txt           |  30 +
>  MAINTAINERS                                        |   6 +
>  drivers/mtd/nand/Kconfig                           |   6 +
>  drivers/mtd/nand/Makefile                          |   1 +
>  drivers/mtd/nand/tegra_nand.c                      | 794 +++++++++++++++++++++
>  5 files changed, 837 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
>  create mode 100644 drivers/mtd/nand/tegra_nand.c
> 

[...]

> +
> +static const char * const part_probes[] = {
> +	"cmdlinepart", "ofpart", NULL };

Where is part_probe referenced in this driver ?

> +
> +static int tegra_nand_probe(struct platform_device *pdev)
> +{
> +	struct tegra_nand *nand;
> +	struct nand_chip *chip;
> +	struct mtd_info *mtd;
> +	struct resource *res;
> +	unsigned long value;
> +	int err = 0;
> +
> +	nand = devm_kzalloc(&pdev->dev, sizeof(*nand), GFP_KERNEL);
> +	if (!nand)
> +		return -ENOMEM;
> +
> +	nand->dev = &pdev->dev;
> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	nand->regs = devm_ioremap_resource(&pdev->dev, res);
> +	if (IS_ERR(nand->regs))
> +		return PTR_ERR(nand->regs);
> +
> +	nand->irq  = platform_get_irq(pdev, 0);
> +	err = devm_request_irq(&pdev->dev, nand->irq, tegra_nand_irq, 0,
> +			       dev_name(&pdev->dev), nand);
> +	if (err)
> +		return err;
> +
> +	nand->rst = devm_reset_control_get(&pdev->dev, "nand");
> +	if (IS_ERR(nand->rst))
> +		return PTR_ERR(nand->rst);
> +
> +	nand->clk = devm_clk_get(&pdev->dev, "nand");
> +	if (IS_ERR(nand->clk))
> +		return PTR_ERR(nand->clk);
> +
> +	err = tegra_nand_parse_dt(pdev->dev.of_node, nand);
> +	if (err)
> +		return err;
> +
> +	err = clk_prepare_enable(nand->clk);
> +	if (err)
> +		return err;
> +
> +	reset_control_assert(nand->rst);
> +	udelay(2);
> +	reset_control_deassert(nand->rst);
> +
> +	if (gpio_is_valid(nand->wp_gpio)) {
> +		err = devm_gpio_request_one(&pdev->dev, nand->wp_gpio,
> +				GPIOF_OUT_INIT_HIGH, "tegra-nand-wp");
> +		if (err)
> +			return err;
> +	}
> +
> +	value = HWSTATUS_RDSTATUS_MASK(1) | HWSTATUS_RDSTATUS_VALUE(0) |
> +		HWSTATUS_RBSY_MASK(NAND_STATUS_READY) |
> +		HWSTATUS_RBSY_VALUE(NAND_STATUS_READY);
> +	writel(NAND_CMD_STATUS, nand->regs + HWSTATUS_CMD);
> +	writel(value, nand->regs + HWSTATUS_MASK);
> +
> +	init_completion(&nand->command_complete);
> +	init_completion(&nand->dma_complete);
> +
> +	mtd = &nand->mtd;
> +	mtd->name = dev_name(&pdev->dev);
> +	mtd->owner = THIS_MODULE;
> +	mtd->priv = &nand->chip;
> +
> +	mtd->type = MTD_NANDFLASH;
> +	mtd->flags = MTD_CAP_NANDFLASH;
> +
> +	/* clear interrupts */
> +	value = readl(nand->regs + ISR);
> +	writel(value, nand->regs + ISR);
> +
> +	writel(DMA_CTRL_IS_DONE, nand->regs + DMA_CTRL);
> +
> +	/* enable interrupts */
> +	value = IER_UND | IER_OVR | IER_CMD_DONE | IER_ECC_ERR | IER_GIE;
> +	writel(value, nand->regs + IER);
> +
> +	chip = &nand->chip;
> +	chip->cmdfunc = tegra_nand_command;
> +	chip->select_chip = tegra_nand_select_chip;
> +	chip->read_byte = tegra_nand_read_byte;
> +	chip->read_buf = tegra_nand_read_buf;
> +	chip->write_buf = tegra_nand_write_buf;
> +
> +	tegra_nand_setup_timing(nand, 0);
> +
> +	err = nand_scan_ident(mtd, 1, NULL);
> +	if (err)
> +		return err;
> +
> +	nand->data_buf = dmam_alloc_coherent(&pdev->dev, mtd->writesize,
> +					    &nand->data_dma, GFP_KERNEL);
> +	if (!nand->data_buf)
> +		return -ENOMEM;
> +
> +	nand->oob_buf = dmam_alloc_coherent(&pdev->dev, mtd->oobsize,
> +					    &nand->oob_dma, GFP_KERNEL);
> +	if (!nand->oob_buf)
> +		return -ENOMEM;
> +
> +	chip->ecc.mode = NAND_ECC_HW;
> +	chip->ecc.size = 512;
> +	chip->ecc.bytes = mtd->oobsize;
> +	chip->ecc.read_page = tegra_nand_read_page;
> +	chip->ecc.write_page = tegra_nand_write_page;

Just a nit, but I would rename those read/write_page functions into
tegra_nand_hwecc_xxx_page to clearly state that HW ECC is involved
here.

> +
> +	value = CFG_HW_ECC | CFG_ECC_SEL | CFG_ERR_COR | CFG_PIPE_EN |
> +		CFG_TVAL_8 | CFG_SKIP_SPARE | CFG_SKIP_SPARE_SIZE_4;

Can you move the CFG_HW_ECC flags setting into tegra_nand_read_page
(setting them at entry and clearing them at exit).
This is really important to be able to access the NAND in raw mode
(i.e. without involving ECC), and you seem to force HW ECC for
all accesses, which means ecc.read/write_page_raw are probably not
working correctly.
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
new file mode 100644
index 0000000..088223c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
@@ -0,0 +1,30 @@ 
+NVIDIA Tegra NAND Flash controller
+
+Required properties:
+- compatible: Must be one of:
+  - "nvidia,tegra20-nand"
+  - "nvidia,tegra30-nand"
+- reg: MMIO address range
+- interrupts: interrupt output of the NFC controller
+- clocks: Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+  - nand
+- resets: Must contain an entry for each entry in reset-names.
+  See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+  - nand
+
+Optional properties:
+- nvidia,wp-gpios: GPIO used to disable write protection of the flash
+
+  Example:
+	nand@70008000 {
+		compatible = "nvidia,tegra20-nand";
+		reg = <0x70008000 0x100>;
+		interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&tegra_car TEGRA20_CLK_NDFLASH>;
+		clock-names = "nand";
+		resets = <&tegra_car 13>;
+		reset-names = "nand";
+	};
diff --git a/MAINTAINERS b/MAINTAINERS
index ddb9ac8..972e31d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9459,6 +9459,12 @@  M:	Laxman Dewangan <ldewangan@nvidia.com>
 S:	Supported
 F:	drivers/input/keyboard/tegra-kbc.c
 
+TEGRA NAND DRIVER
+M:	Lucas Stach <dev@lynxeye.de>
+S:	Maintained
+F:	Documentation/devicetree/bindings/mtd/nvidia,tegra20-nand.txt
+F:	drivers/mtd/nand/tegra_nand.c
+
 TEGRA PWM DRIVER
 M:	Thierry Reding <thierry.reding@gmail.com>
 S:	Supported
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 7d0150d..1eafd4e 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -524,4 +524,10 @@  config MTD_NAND_SUNXI
 	help
 	  Enables support for NAND Flash chips on Allwinner SoCs.
 
+config MTD_NAND_TEGRA
+	tristate "Support for NAND on NVIDIA Tegra"
+	depends on ARCH_TEGRA || COMPILE_TEST
+	help
+	  Enables support for NAND flash on NVIDIA Tegra SoC based boards.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index bd38f21..58399ce 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -51,5 +51,6 @@  obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
 obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
 obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
 obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
+obj-$(CONFIG_MTD_NAND_TEGRA)		+= tegra_nand.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o
diff --git a/drivers/mtd/nand/tegra_nand.c b/drivers/mtd/nand/tegra_nand.c
new file mode 100644
index 0000000..b919a6e
--- /dev/null
+++ b/drivers/mtd/nand/tegra_nand.c
@@ -0,0 +1,794 @@ 
+/*
+ * Copyright (C) 2014-2015 Lucas Stach <dev@lynxeye.de>
+ * Copyright (C) 2012 Avionic Design GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/of_gpio.h>
+#include <linux/of_mtd.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#define CMD				0x00
+#define   CMD_GO			(1 << 31)
+#define   CMD_CLE			(1 << 30)
+#define   CMD_ALE			(1 << 29)
+#define   CMD_PIO			(1 << 28)
+#define   CMD_TX			(1 << 27)
+#define   CMD_RX			(1 << 26)
+#define   CMD_SEC_CMD			(1 << 25)
+#define   CMD_AFT_DAT			(1 << 24)
+#define   CMD_TRANS_SIZE(x)		(((x) & 0xf) << 20)
+#define   CMD_A_VALID			(1 << 19)
+#define   CMD_B_VALID			(1 << 18)
+#define   CMD_RD_STATUS_CHK		(1 << 17)
+#define   CMD_RBSY_CHK			(1 << 16)
+#define   CMD_CE(x)			(1 << (8 + ((x) & 0x7)))
+#define   CMD_CLE_SIZE(x)		(((x) & 0x3) << 4)
+#define   CMD_ALE_SIZE(x)		(((x) & 0xf) << 0)
+
+#define STATUS				0x04
+
+#define ISR				0x08
+#define   ISR_UND			(1 << 7)
+#define   ISR_OVR			(1 << 6)
+#define   ISR_CMD_DONE			(1 << 5)
+#define   ISR_ECC_ERR			(1 << 4)
+
+#define IER				0x0c
+#define   IER_ERR_TRIG_VAL(x)		(((x) & 0xf) << 16)
+#define   IER_UND			(1 << 7)
+#define   IER_OVR			(1 << 6)
+#define   IER_CMD_DONE			(1 << 5)
+#define   IER_ECC_ERR			(1 << 4)
+#define   IER_GIE			(1 << 0)
+
+#define CFG				0x10
+#define   CFG_HW_ECC			(1 << 31)
+#define   CFG_ECC_SEL			(1 << 30)
+#define   CFG_ERR_COR			(1 << 29)
+#define   CFG_PIPE_EN			(1 << 28)
+#define   CFG_TVAL_4			(0 << 24)
+#define   CFG_TVAL_6			(1 << 24)
+#define   CFG_TVAL_8			(2 << 24)
+#define   CFG_SKIP_SPARE		(1 << 23)
+#define   CFG_BUS_WIDTH_8		(0 << 21)
+#define   CFG_BUS_WIDTH_16		(1 << 21)
+#define   CFG_COM_BSY			(1 << 20)
+#define   CFG_PS_256			(0 << 16)
+#define   CFG_PS_512			(1 << 16)
+#define   CFG_PS_1024			(2 << 16)
+#define   CFG_PS_2048			(3 << 16)
+#define   CFG_PS_4096			(4 << 16)
+#define   CFG_SKIP_SPARE_SIZE_4		(0 << 14)
+#define   CFG_SKIP_SPARE_SIZE_8		(1 << 14)
+#define   CFG_SKIP_SPARE_SIZE_12	(2 << 14)
+#define   CFG_SKIP_SPARE_SIZE_16	(3 << 14)
+#define   CFG_TAG_BYTE_SIZE(x)		((x) & 0xff)
+
+#define TIMING_1			0x14
+#define   TIMING_TRP_RESP(x)		(((x) & 0xf) << 28)
+#define   TIMING_TWB(x)			(((x) & 0xf) << 24)
+#define   TIMING_TCR_TAR_TRR(x)		(((x) & 0xf) << 20)
+#define   TIMING_TWHR(x)		(((x) & 0xf) << 16)
+#define   TIMING_TCS(x)			(((x) & 0xc) << 14)
+#define   TIMING_TWH(x)			(((x) & 0x3) << 12)
+#define   TIMING_TWP(x)			(((x) & 0xf) <<  8)
+#define   TIMING_TRH(x)			(((x) & 0xf) <<  4)
+#define   TIMING_TRP(x)			(((x) & 0xf) <<  0)
+
+#define RESP				0x18
+
+#define TIMING_2			0x1c
+#define   TIMING_TADL(x)		((x) & 0xf)
+
+#define CMD_1				0x20
+#define CMD_2				0x24
+#define ADDR_1				0x28
+#define ADDR_2				0x2c
+
+#define DMA_CTRL			0x30
+#define   DMA_CTRL_GO			(1 << 31)
+#define   DMA_CTRL_IN			(0 << 30)
+#define   DMA_CTRL_OUT			(1 << 30)
+#define   DMA_CTRL_PERF_EN		(1 << 29)
+#define   DMA_CTRL_IE_DONE		(1 << 28)
+#define   DMA_CTRL_REUSE		(1 << 27)
+#define   DMA_CTRL_BURST_1		(2 << 24)
+#define   DMA_CTRL_BURST_4		(3 << 24)
+#define   DMA_CTRL_BURST_8		(4 << 24)
+#define   DMA_CTRL_BURST_16		(5 << 24)
+#define   DMA_CTRL_IS_DONE		(1 << 20)
+#define   DMA_CTRL_EN_A			(1 <<  2)
+#define   DMA_CTRL_EN_B			(1 <<  1)
+
+#define DMA_CFG_A			0x34
+#define DMA_CFG_B			0x38
+
+#define FIFO_CTRL			0x3c
+#define   FIFO_CTRL_CLR_ALL		(1 << 3)
+
+#define DATA_PTR			0x40
+#define TAG_PTR				0x44
+#define ECC_PTR				0x48
+
+#define HWSTATUS_CMD			0x50
+#define HWSTATUS_MASK			0x54
+#define   HWSTATUS_RDSTATUS_MASK(x)	(((x) & 0xff) << 24)
+#define   HWSTATUS_RDSTATUS_VALUE(x)	(((x) & 0xff) << 16)
+#define   HWSTATUS_RBSY_MASK(x)		(((x) & 0xff) << 8)
+#define   HWSTATUS_RBSY_VALUE(x)	(((x) & 0xff) << 0)
+
+#define DEC_RESULT			0xd0
+#define   DEC_RESULT_CORRFAIL		(1 << 8)
+
+#define DEC_STATUS_BUF			0xd4
+#define   DEC_STATUS_BUF_FAIL_SEC_FLAG(x)	((x) & (0xff << 24))
+#define   DEC_STATUS_BUF_CORR_SEC_FLAG(x)	((x) & (0xff << 16))
+#define   DEC_STATUS_BUF_MAX_CORR_CNT(x)	(((x) & 0xf00) >> 8)
+
+struct tegra_nand {
+	void __iomem *regs;
+	int irq;
+	struct clk *clk;
+	struct reset_control *rst;
+	int wp_gpio;
+	int buswidth;
+
+	struct nand_chip chip;
+	struct mtd_info mtd;
+	struct device *dev;
+
+	struct completion command_complete;
+	struct completion dma_complete;
+
+	dma_addr_t data_dma;
+	void *data_buf;
+	dma_addr_t oob_dma;
+	void *oob_buf;
+
+	int cur_chip;
+};
+
+static inline struct tegra_nand *to_tegra_nand(struct mtd_info *mtd)
+{
+	return container_of(mtd, struct tegra_nand, mtd);
+}
+
+static struct nand_ecclayout tegra_nand_oob_16 = {
+	.eccbytes = 4,
+	.eccpos = { 3, 4, 5, 6 },
+	.oobfree = {
+		{ .offset = 8, . length = 8 }
+	}
+};
+
+static struct nand_ecclayout tegra_nand_oob_64 = {
+	.eccbytes = 36,
+	.eccpos = {
+		 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+		19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+		35, 36, 37, 38, 39
+	},
+	.oobfree = {
+		{ .offset = 40, .length = 20 }
+	}
+};
+
+static struct nand_ecclayout tegra_nand_oob_128 = {
+	.eccbytes = 72,
+	.eccpos = {
+		 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+		19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+		35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+		51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
+		67, 68, 69, 70, 71, 72, 73, 74, 75
+	},
+	.oobfree = {
+		{ .offset = 76, .length = 52 }
+	}
+};
+
+static struct nand_ecclayout tegra_nand_oob_224 = {
+	.eccbytes = 144,
+	.eccpos = {
+		  3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+		 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
+		 27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
+		 39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,
+		 51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,
+		 63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+		 75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,
+		 87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,
+		 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+		111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
+		123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+		135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
+		147
+	},
+	.oobfree = {
+		{ .offset = 148, .length = 76 }
+	}
+};
+
+static irqreturn_t tegra_nand_irq(int irq, void *data)
+{
+	struct tegra_nand *nand = data;
+	irqreturn_t ret = IRQ_HANDLED;
+	u32 isr, dma;
+
+	isr = readl(nand->regs + ISR);
+	dma = readl(nand->regs + DMA_CTRL);
+
+	if (!isr && !(dma & DMA_CTRL_IS_DONE)) {
+		ret = IRQ_NONE;
+		goto out;
+	}
+
+	if (isr & ISR_CMD_DONE)
+		complete(&nand->command_complete);
+
+	if (isr & ISR_UND)
+		dev_dbg(nand->dev, "  FIFO underrun\n");
+
+	if (isr & ISR_OVR)
+		dev_dbg(nand->dev, "  FIFO overrun\n");
+
+	/* handle DMA interrupts */
+	if (dma & DMA_CTRL_IS_DONE) {
+		writel(dma, nand->regs + DMA_CTRL);
+		complete(&nand->dma_complete);
+	}
+
+	/* clear interrupts */
+	writel(isr, nand->regs + ISR);
+
+out:
+	return ret;
+}
+
+static void tegra_nand_command(struct mtd_info *mtd, unsigned int command,
+			       int column, int page_addr)
+{
+	struct tegra_nand *nand = to_tegra_nand(mtd);
+	u32 value;
+
+	switch (command) {
+	case NAND_CMD_READOOB:
+		column += mtd->writesize;
+		/* fall-through */
+
+	case NAND_CMD_READ0:
+		writel(NAND_CMD_READ0, nand->regs + CMD_1);
+		writel(NAND_CMD_READSTART, nand->regs + CMD_2);
+
+		value = (page_addr << 16) | (column & 0xffff);
+		writel(value, nand->regs + ADDR_1);
+
+		value = page_addr >> 16;
+		writel(value, nand->regs + ADDR_2);
+
+		value = CMD_CLE | CMD_ALE | CMD_ALE_SIZE(4) | CMD_SEC_CMD |
+			CMD_RBSY_CHK | CMD_CE(nand->cur_chip) | CMD_GO;
+		writel(value, nand->regs + CMD);
+		break;
+
+	case NAND_CMD_SEQIN:
+		writel(NAND_CMD_SEQIN, nand->regs + CMD_1);
+
+		value = (page_addr << 16) | (column & 0xffff);
+		writel(value, nand->regs + ADDR_1);
+
+		value = page_addr >> 16;
+		writel(value, nand->regs + ADDR_2);
+
+		value = CMD_CLE | CMD_ALE | CMD_ALE_SIZE(4) |
+			CMD_CE(nand->cur_chip) | CMD_GO;
+		writel(value, nand->regs + CMD);
+		break;
+
+	case NAND_CMD_PAGEPROG:
+		writel(NAND_CMD_PAGEPROG, nand->regs + CMD_1);
+
+		value = CMD_CLE | CMD_CE(nand->cur_chip) | CMD_GO;
+		writel(value, nand->regs + CMD);
+		break;
+
+	case NAND_CMD_READID:
+		writel(NAND_CMD_READID, nand->regs + CMD_1);
+		writel(column & 0xff, nand->regs + ADDR_1);
+
+		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_CE(nand->cur_chip);
+		writel(value, nand->regs + CMD);
+		break;
+
+	case NAND_CMD_ERASE1:
+		writel(NAND_CMD_ERASE1, nand->regs + CMD_1);
+		writel(NAND_CMD_ERASE2, nand->regs + CMD_2);
+		writel(page_addr, nand->regs + ADDR_1);
+
+		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_ALE_SIZE(2) |
+			CMD_SEC_CMD | CMD_RBSY_CHK | CMD_CE(nand->cur_chip);
+		writel(value, nand->regs + CMD);
+		break;
+
+	case NAND_CMD_ERASE2:
+		return;
+
+	case NAND_CMD_STATUS:
+		writel(NAND_CMD_STATUS, nand->regs + CMD_1);
+
+		value = CMD_GO | CMD_CLE | CMD_CE(nand->cur_chip);
+		writel(value, nand->regs + CMD);
+		break;
+
+	case NAND_CMD_PARAM:
+		writel(NAND_CMD_PARAM, nand->regs + CMD_1);
+		writel(column & 0xff, nand->regs + ADDR_1);
+		value = CMD_GO | CMD_CLE | CMD_ALE | CMD_CE(nand->cur_chip);
+		writel(value, nand->regs + CMD);
+		break;
+
+	case NAND_CMD_RESET:
+		writel(NAND_CMD_RESET, nand->regs + CMD_1);
+
+		value = CMD_GO | CMD_CLE | CMD_CE(nand->cur_chip);
+		writel(value, nand->regs + CMD);
+		break;
+
+	default:
+		dev_warn(nand->dev, "unsupported command: %x\n", command);
+		return;
+	}
+
+	wait_for_completion(&nand->command_complete);
+}
+
+static void tegra_nand_select_chip(struct mtd_info *mtd, int chip)
+{
+	struct tegra_nand *nand = to_tegra_nand(mtd);
+
+	nand->cur_chip = chip;
+}
+
+static uint8_t tegra_nand_read_byte(struct mtd_info *mtd)
+{
+	struct tegra_nand *nand = to_tegra_nand(mtd);
+	u32 value;
+
+	value = CMD_TRANS_SIZE(0) | CMD_CE(nand->cur_chip) |
+		CMD_PIO | CMD_RX | CMD_A_VALID | CMD_GO;
+
+	writel(value, nand->regs + CMD);
+	wait_for_completion(&nand->command_complete);
+
+	return readl(nand->regs + RESP) & 0xff;
+}
+
+static void tegra_nand_read_buf(struct mtd_info *mtd, uint8_t *buffer,
+				int length)
+{
+	struct tegra_nand *nand = to_tegra_nand(mtd);
+	size_t i;
+
+	for (i = 0; i < length; i += 4) {
+		u32 value;
+		size_t n = min_t(size_t, length - i, 4);
+
+		value = CMD_GO | CMD_PIO | CMD_RX | CMD_A_VALID |
+			CMD_CE(nand->cur_chip) | CMD_TRANS_SIZE(n - 1);
+
+		writel(value, nand->regs + CMD);
+		wait_for_completion(&nand->command_complete);
+
+		value = readl(nand->regs + RESP);
+		memcpy(buffer + i, &value, n);
+	}
+}
+
+static void tegra_nand_write_buf(struct mtd_info *mtd, const uint8_t *buffer,
+				 int length)
+{
+	struct tegra_nand *nand = to_tegra_nand(mtd);
+	size_t i;
+
+	for (i = 0; i < length; i += 4) {
+		u32 value;
+		size_t n = min_t(size_t, length - i, 4);
+
+		memcpy(&value, buffer + i, n);
+		writel(value, nand->regs + RESP);
+
+		value = CMD_GO | CMD_PIO | CMD_TX | CMD_A_VALID |
+			CMD_CE(nand->cur_chip) | CMD_TRANS_SIZE(n - 1);
+
+		writel(value, nand->regs + CMD);
+		wait_for_completion(&nand->command_complete);
+	}
+}
+
+static int tegra_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
+{
+	struct tegra_nand *nand = to_tegra_nand(mtd);
+	u32 value;
+
+	writel(mtd->writesize - 1, nand->regs + DMA_CFG_A);
+	writel(nand->data_dma, nand->regs + DATA_PTR);
+
+	if (oob_required) {
+		writel(mtd->oobsize - 1, nand->regs + DMA_CFG_B);
+		writel(nand->oob_dma, nand->regs + TAG_PTR);
+	} else {
+		writel(0, nand->regs + DMA_CFG_B);
+		writel(0, nand->regs + TAG_PTR);
+	}
+
+	value = DMA_CTRL_GO | DMA_CTRL_IN | DMA_CTRL_PERF_EN |
+		DMA_CTRL_REUSE | DMA_CTRL_IE_DONE | DMA_CTRL_IS_DONE |
+		DMA_CTRL_BURST_8 | DMA_CTRL_EN_A;
+
+	if (oob_required)
+		value |= DMA_CTRL_EN_B;
+
+	writel(value, nand->regs + DMA_CTRL);
+
+	value = CMD_GO | CMD_RX | CMD_TRANS_SIZE(8) |
+		CMD_A_VALID | CMD_CE(nand->cur_chip);
+	if (oob_required)
+		value |= CMD_B_VALID;
+	writel(value, nand->regs + CMD);
+
+	wait_for_completion(&nand->command_complete);
+	wait_for_completion(&nand->dma_complete);
+
+	if (oob_required)
+		memcpy(chip->oob_poi, nand->oob_buf, mtd->oobsize);
+	memcpy(buf, nand->data_buf, mtd->writesize);
+
+	value = readl(nand->regs + DEC_RESULT);
+	if (value & DEC_RESULT_CORRFAIL) {
+		value = readl(nand->regs + DEC_STATUS_BUF);
+
+		if (DEC_STATUS_BUF_FAIL_SEC_FLAG(value))
+			return -1;
+
+		if (DEC_STATUS_BUF_CORR_SEC_FLAG(value))
+			return DEC_STATUS_BUF_MAX_CORR_CNT(value);
+	}
+
+	return 0;
+}
+
+static int tegra_nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+				 const uint8_t *buf, int oob_required)
+{
+	struct tegra_nand *nand = to_tegra_nand(mtd);
+	unsigned long value;
+	int ret = 0;
+
+	memcpy(nand->data_buf, buf, mtd->writesize);
+
+	writel(mtd->writesize - 1, nand->regs + DMA_CFG_A);
+	writel(nand->data_dma, nand->regs + DATA_PTR);
+
+	writel(0, nand->regs + DMA_CFG_B);
+	writel(0, nand->regs + TAG_PTR);
+
+	value = DMA_CTRL_GO | DMA_CTRL_OUT | DMA_CTRL_PERF_EN |
+		DMA_CTRL_IE_DONE | DMA_CTRL_IS_DONE |
+		DMA_CTRL_BURST_8 | DMA_CTRL_EN_A;
+	writel(value, nand->regs + DMA_CTRL);
+
+	value = CMD_GO | CMD_TX | CMD_A_VALID | CMD_TRANS_SIZE(8) |
+		CMD_CE(nand->cur_chip);
+	writel(value, nand->regs + CMD);
+
+	wait_for_completion(&nand->command_complete);
+	wait_for_completion(&nand->dma_complete);
+
+	return ret;
+}
+
+static void tegra_nand_setup_timing(struct tegra_nand *nand, int mode)
+{
+	unsigned long rate = clk_get_rate(nand->clk) / 1000000;
+	unsigned long period = 1000000 / rate;
+	const struct nand_sdr_timings *timings;
+	u32 val, reg = 0;
+
+	timings = onfi_async_timing_mode_to_sdr_timings(mode);
+
+	val = max3(timings->tAR_min, timings->tRR_min,
+		   timings->tRC_min) / period;
+	if (val > 2)
+		val -= 2;
+	reg |= TIMING_TCR_TAR_TRR(val);
+
+	val = max(max(timings->tCS_min, timings->tCH_min),
+		  max(timings->tALS_min, timings->tALH_min)) / period;
+	if (val > 1)
+		val -= 1;
+	reg |= TIMING_TCS(val);
+
+	val = max(timings->tRP_min, timings->tREA_max) + 6000;
+	reg |= TIMING_TRP(val / 1000);
+	reg |= TIMING_TRP_RESP(val / period);
+
+	reg |= TIMING_TWB(timings->tWB_max / period);
+	reg |= TIMING_TWHR(timings->tWHR_min / period);
+	reg |= TIMING_TWH(timings->tWH_min / 1000);
+	reg |= TIMING_TWP(timings->tWP_min / 1000);
+	reg |= TIMING_TRH(timings->tRHW_min / 1000);
+
+	writel(reg, nand->regs + TIMING_1);
+
+	val = timings->tADL_min / period;
+	if (val > 2)
+		val -= 2;
+	reg = TIMING_TADL(val);
+
+	writel(reg, nand->regs + TIMING_2);
+}
+
+static void tegra_nand_setup_chiptiming(struct tegra_nand *nand)
+{
+	struct nand_chip *chip = &nand->chip;
+	int mode;
+
+	mode = onfi_get_async_timing_mode(chip);
+	if (mode == ONFI_TIMING_MODE_UNKNOWN)
+		mode = chip->onfi_timing_mode_default;
+	else
+		mode = fls(mode);
+
+	tegra_nand_setup_timing(nand, mode);
+}
+
+static int tegra_nand_parse_dt(struct device_node *node,
+			       struct tegra_nand *nand)
+{
+	enum of_gpio_flags flags;
+
+	nand->wp_gpio = of_get_named_gpio_flags(node, "nvidia,wp-gpios", 0,
+						 &flags);
+	if (nand->wp_gpio < 0)
+		nand->wp_gpio = 0;
+
+	nand->buswidth = of_get_nand_bus_width(node);
+	if (nand->buswidth < 0)
+		return nand->buswidth;
+
+	return 0;
+}
+
+static const char * const part_probes[] = {
+	"cmdlinepart", "ofpart", NULL };
+
+static int tegra_nand_probe(struct platform_device *pdev)
+{
+	struct tegra_nand *nand;
+	struct nand_chip *chip;
+	struct mtd_info *mtd;
+	struct resource *res;
+	unsigned long value;
+	int err = 0;
+
+	nand = devm_kzalloc(&pdev->dev, sizeof(*nand), GFP_KERNEL);
+	if (!nand)
+		return -ENOMEM;
+
+	nand->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nand->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(nand->regs))
+		return PTR_ERR(nand->regs);
+
+	nand->irq  = platform_get_irq(pdev, 0);
+	err = devm_request_irq(&pdev->dev, nand->irq, tegra_nand_irq, 0,
+			       dev_name(&pdev->dev), nand);
+	if (err)
+		return err;
+
+	nand->rst = devm_reset_control_get(&pdev->dev, "nand");
+	if (IS_ERR(nand->rst))
+		return PTR_ERR(nand->rst);
+
+	nand->clk = devm_clk_get(&pdev->dev, "nand");
+	if (IS_ERR(nand->clk))
+		return PTR_ERR(nand->clk);
+
+	err = tegra_nand_parse_dt(pdev->dev.of_node, nand);
+	if (err)
+		return err;
+
+	err = clk_prepare_enable(nand->clk);
+	if (err)
+		return err;
+
+	reset_control_assert(nand->rst);
+	udelay(2);
+	reset_control_deassert(nand->rst);
+
+	if (gpio_is_valid(nand->wp_gpio)) {
+		err = devm_gpio_request_one(&pdev->dev, nand->wp_gpio,
+				GPIOF_OUT_INIT_HIGH, "tegra-nand-wp");
+		if (err)
+			return err;
+	}
+
+	value = HWSTATUS_RDSTATUS_MASK(1) | HWSTATUS_RDSTATUS_VALUE(0) |
+		HWSTATUS_RBSY_MASK(NAND_STATUS_READY) |
+		HWSTATUS_RBSY_VALUE(NAND_STATUS_READY);
+	writel(NAND_CMD_STATUS, nand->regs + HWSTATUS_CMD);
+	writel(value, nand->regs + HWSTATUS_MASK);
+
+	init_completion(&nand->command_complete);
+	init_completion(&nand->dma_complete);
+
+	mtd = &nand->mtd;
+	mtd->name = dev_name(&pdev->dev);
+	mtd->owner = THIS_MODULE;
+	mtd->priv = &nand->chip;
+
+	mtd->type = MTD_NANDFLASH;
+	mtd->flags = MTD_CAP_NANDFLASH;
+
+	/* clear interrupts */
+	value = readl(nand->regs + ISR);
+	writel(value, nand->regs + ISR);
+
+	writel(DMA_CTRL_IS_DONE, nand->regs + DMA_CTRL);
+
+	/* enable interrupts */
+	value = IER_UND | IER_OVR | IER_CMD_DONE | IER_ECC_ERR | IER_GIE;
+	writel(value, nand->regs + IER);
+
+	chip = &nand->chip;
+	chip->cmdfunc = tegra_nand_command;
+	chip->select_chip = tegra_nand_select_chip;
+	chip->read_byte = tegra_nand_read_byte;
+	chip->read_buf = tegra_nand_read_buf;
+	chip->write_buf = tegra_nand_write_buf;
+
+	tegra_nand_setup_timing(nand, 0);
+
+	err = nand_scan_ident(mtd, 1, NULL);
+	if (err)
+		return err;
+
+	nand->data_buf = dmam_alloc_coherent(&pdev->dev, mtd->writesize,
+					    &nand->data_dma, GFP_KERNEL);
+	if (!nand->data_buf)
+		return -ENOMEM;
+
+	nand->oob_buf = dmam_alloc_coherent(&pdev->dev, mtd->oobsize,
+					    &nand->oob_dma, GFP_KERNEL);
+	if (!nand->oob_buf)
+		return -ENOMEM;
+
+	chip->ecc.mode = NAND_ECC_HW;
+	chip->ecc.size = 512;
+	chip->ecc.bytes = mtd->oobsize;
+	chip->ecc.read_page = tegra_nand_read_page;
+	chip->ecc.write_page = tegra_nand_write_page;
+
+	value = CFG_HW_ECC | CFG_ECC_SEL | CFG_ERR_COR | CFG_PIPE_EN |
+		CFG_TVAL_8 | CFG_SKIP_SPARE | CFG_SKIP_SPARE_SIZE_4;
+
+	switch (mtd->oobsize) {
+	case 16:
+		chip->ecc.layout = &tegra_nand_oob_16;
+		chip->ecc.strength = 1;
+		value |= CFG_TAG_BYTE_SIZE(4);
+		break;
+	case 64:
+		chip->ecc.layout = &tegra_nand_oob_64;
+		chip->ecc.strength = 8;
+		value |= CFG_TAG_BYTE_SIZE(36);
+		break;
+	case 128:
+		chip->ecc.layout = &tegra_nand_oob_128;
+		chip->ecc.strength = 8;
+		value |= CFG_TAG_BYTE_SIZE(72);
+		break;
+	case 224:
+		chip->ecc.layout = &tegra_nand_oob_224;
+		chip->ecc.strength = 8;
+		value |= CFG_TAG_BYTE_SIZE(144);
+		break;
+	default:
+		dev_err(&pdev->dev, "unhandled OOB size %d\n", mtd->oobsize);
+		return -ENODEV;
+	}
+
+	switch (mtd->writesize) {
+	case 256:
+		value |= CFG_PS_256;
+		break;
+	case 512:
+		value |= CFG_PS_512;
+		break;
+	case 1024:
+		value |= CFG_PS_1024;
+		break;
+	case 2048:
+		value |= CFG_PS_2048;
+		break;
+	case 4096:
+		value |= CFG_PS_4096;
+		break;
+	default:
+		dev_err(&pdev->dev, "unhandled writesize %d\n", mtd->writesize);
+		return -ENODEV;
+	}
+
+	if (nand->buswidth == 16)
+		value |= CFG_BUS_WIDTH_16;
+
+	writel(value, nand->regs + CFG);
+
+	tegra_nand_setup_chiptiming(nand);
+
+	err = nand_scan_tail(mtd);
+	if (err)
+		return err;
+
+	mtd_device_parse_register(mtd, NULL,
+				  &(struct mtd_part_parser_data) {
+					.of_node = pdev->dev.of_node,
+				  },
+				  NULL, 0);
+
+	platform_set_drvdata(pdev, nand);
+
+	return 0;
+}
+
+static int tegra_nand_remove(struct platform_device *pdev)
+{
+	struct tegra_nand *nand = platform_get_drvdata(pdev);
+
+	nand_release(&nand->mtd);
+
+	clk_disable_unprepare(nand->clk);
+
+	return 0;
+}
+
+static const struct of_device_id tegra_nand_of_match[] = {
+	{ .compatible = "nvidia,tegra20-nand" },
+	{ .compatible = "nvidia,tegra30-nand" },
+	{ }
+};
+
+static struct platform_driver tegra_nand_driver = {
+	.driver = {
+		.name = "tegra-nand",
+		.of_match_table = tegra_nand_of_match,
+	},
+	.probe = tegra_nand_probe,
+	.remove = tegra_nand_remove,
+};
+module_platform_driver(tegra_nand_driver);
+
+MODULE_DESCRIPTION("NVIDIA Tegra NAND driver");
+MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de");
+MODULE_AUTHOR("Lucas Stach <dev@lynxeye.de");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, tegra_nand_of_match);