diff mbox

[U-Boot,v2,3/4] mtd: nand: add Freescale NFC driver

Message ID 70dc426d4c612b10f760337ffdb5f79aff173a8f.1408031213.git.stefan@agner.ch
State Changes Requested
Delegated to: Stefano Babic
Headers show

Commit Message

Stefan Agner Aug. 14, 2014, 4:30 p.m. UTC
This adds initial support for Freescale NFC (NAND Flash Controller)
found in ARM Vybrid SoC's, Power Architecture MPC5125 and others.
However, this driver is only tested on Vybrid.

Signed-off-by: Stefan Agner <stefan@agner.ch>
---
 drivers/mtd/nand/Makefile    |   1 +
 drivers/mtd/nand/vf610_nfc.c | 706 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 707 insertions(+)
 create mode 100644 drivers/mtd/nand/vf610_nfc.c

Comments

Bill Pringlemeir Aug. 14, 2014, 6:34 p.m. UTC | #1
On 14 Aug 2014, stefan@agner.ch wrote:

> This adds initial support for Freescale NFC (NAND Flash Controller)
> found in ARM Vybrid SoC's, Power Architecture MPC5125 and others.
> However, this driver is only tested on Vybrid.
> 
> Signed-off-by: Stefan Agner <stefan@agner.ch>
> ---
>  drivers/mtd/nand/Makefile    |   1 +
>  drivers/mtd/nand/vf610_nfc.c | 706
> ++++++++++++++++++++++++++++++++++++++++++> +
>  2 files changed, 707 insertions(+)
>  create mode 100644 drivers/mtd/nand/vf610_nfc.c
> 
> diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
> new file mode 100644
> index 0000000..3150ac1
> --- /dev/null
> +++ b/drivers/mtd/nand/vf610_nfc.c
> @@ -0,0 +1,706 @@
> +/*
> + * Copyright 2009-2014 Freescale Semiconductor, Inc. and others
> + *

[snip]

> +/* Count the number of 0's in buff upto max_bits */
> +static inline int count_written_bits(uint8_t *buff, int size, int max_bits)
> +{
> +	uint32_t *buff32 = (uint32_t *)buff;
> +	int k, written_bits = 0;
> +
> +	for (k = 0; k < (size / 4); k++) {
> +		written_bits += hweight32(~buff32[k]);
> +		if (written_bits > max_bits)
> +			break;
> +	}
> +
> +	return written_bits;
> +}
 
That is a nice change. 
 
> +static inline int vf610_nfc_correct_data(struct mtd_info *mtd, u_char *dat)
> +{
> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
> +	u8 ecc_status;
> +	u8 ecc_count;
> +	int flip;
> +
> +	ecc_status = __raw_readb(nfc->regs + ECC_SRAM_ADDR * 8 + ECC_OFFSET);
> +	ecc_count = ecc_status & ECC_ERR_COUNT;
> +	if (!(ecc_status & ECC_STATUS_MASK))
> +		return ecc_count;
> +
> +	/* If 'ecc_count' zero or less then buffer is all 0xff or erased. */
> +	flip = count_written_bits(dat, nfc->chip.ecc.size, ecc_count);
> +
> +	/* ECC failed. */
> +	if (flip > ecc_count)
> +		return -1;

Sorry, I missed this in version one of the patch.  The original had,

<       if (flip > ecc_count) {
<               nfc->page = -1;
---
>       if (flip > ecc_count)
522d508
<       }

I can see why you removed this (nfc->page = -1).  However, I think that
higher layers may want to re-read on an error in case of un-stable bits?
It is very little code to ensure a re-read in case of ECC failure.  The
2nd physical read may pass whereas the first failed.  This path is rare,
but maybe important?  A higher layer may migrate the data in this case;
just as with a corrected bits.  But maybe U-Boot will never do this?

> +
> +	/* Erased page. */
> +	memset(dat, 0xff, nfc->chip.ecc.size);
> +	return 0;
> +}

Regards,
Bill Pringlemeir.
Scott Wood Aug. 14, 2014, 7:49 p.m. UTC | #2
On Thu, 2014-08-14 at 18:30 +0200, Stefan Agner wrote:
> +#define	DRV_NAME		"fsl_nfc"

DRV_NAME doesn't match filename (neither does the patch title), and it
doesn't seem all that useful anyway -- the one place that uses it would
be better off using __func__.

> +static int vf610_nfc_nand_init(int devnum, u8 *addr)

Why u8?  Either use void or u32.  Also should have __iomem.

...OK, I see you copied that from the examples I pointed out.  I have no
idea why those use u8 either. :-(

> +	chip->IO_ADDR_R = chip->IO_ADDR_W = nfc->regs = (void __iomem *)addr;

Don't set IO_ADDR_R/IO_ADDR_W if they're not going to be used.

-Scott
Bill Pringlemeir Aug. 14, 2014, 9:12 p.m. UTC | #3
> On 14 Aug 2014, stefan@agner.ch wrote:
> 
> This adds initial support for Freescale NFC (NAND Flash Controller)
> found in ARM Vybrid SoC's, Power Architecture MPC5125 and others.
> However, this driver is only tested on Vybrid.

This is only to expand on the nand controller register and SRAM use.

[snip]
 
> diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
> new file mode 100644
> index 0000000..3150ac1
> --- /dev/null
> +++ b/drivers/mtd/nand/vf610_nfc.c

[snip]

> +static inline u32 vf610_nfc_read(struct mtd_info *mtd, uint reg)
> +{
> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
> +
> +	return readl(nfc->regs + reg);
> +}
> +
> +static inline void vf610_nfc_write(struct mtd_info *mtd, uint reg, u32 val)
> +{
> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
> +
> +	writel(val, nfc->regs + reg);
> +}

Ok, we always use readl/writel.  This is fine, but a little slower and
bigger.  I may try a register cache if I resubmit to the Linux MTD as
per Scott's suggestion.  Especially, this version is good for an
incremental patch.

I think these are in 'arch/arm/include/asm/io.h' of U-Boot.

#define dmb()           __asm__ __volatile__ ("" : : : "memory")
#define __iormb()       dmb()
#define __iowmb()       dmb()

#define readl(c)        ({ u32 __v = __arch_getl(c); __iormb(); __v; })
#define writel(v,c)     ({ u32 __v = v; __iowmb(); __arch_putl(__v,c); __v; })

Currently, these look like compiler barriers to me.  Fine so far.

> +
> +static inline void vf610_nfc_set(struct mtd_info *mtd, uint reg, u32 bits)
> +{
> +	vf610_nfc_write(mtd, reg, vf610_nfc_read(mtd, reg) | bits);
> +}
> +
> +static inline void vf610_nfc_clear(struct mtd_info *mtd, uint reg, u32 bits)
> +{
> +	vf610_nfc_write(mtd, reg, vf610_nfc_read(mtd, reg) & ~bits);
> +}
> +
> +static inline void vf610_nfc_set_field(struct mtd_info *mtd, u32 reg,
> +				       u32 mask, u32 shift, u32 val)
> +{
> +	vf610_nfc_write(mtd, reg,
> +			(vf610_nfc_read(mtd, reg) & (~mask)) | val << shift);
> +}
> +
> +/* Clear flags for upcoming command */
> +static inline void vf610_nfc_clear_status(struct mtd_info *mtd)
> +{
> +	u32 tmp = vf610_nfc_read(mtd, NFC_IRQ_STATUS);
> +	tmp |= CMD_DONE_CLEAR_BIT | IDLE_CLEAR_BIT;
> +	vf610_nfc_write(mtd, NFC_IRQ_STATUS, tmp);
> +}
> +
> +/* Wait for complete operation */
> +static inline void vf610_nfc_done(struct mtd_info *mtd)
> +{
> +	uint start;
> +
> +	vf610_nfc_set(mtd, NFC_FLASH_CMD2, START_BIT);
> +	barrier();

This barrier() is not needed then.  The  vf610_nfc_set() should have
done it twice already, plus everything is volatile.

[snip]

> +static inline void vf610_nfc_read_spare(struct mtd_info *mtd, void *buf,
> +					int len)
> +{
> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
> +
> +	len = min(mtd->oobsize, (uint)len);
> +	if (len > 0)
> +		memcpy(buf, nfc->regs + mtd->writesize, len);

Notice the 'memcpy(.. nfc->regs);'...

> +}
> +
> +/* Read data from NFC buffers */
> +static void vf610_nfc_read_buf(struct mtd_info *mtd, u_char *buf, int len)
> +{
> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
> +	uint c = nfc->column;
> +	uint l;
> +
> +	/* Handle main area */
> +	if (!nfc->spareonly) {
> +
> +		l = min((uint)len, mtd->writesize - c);
> +		nfc->column += l;
> +
> +		if (!nfc->alt_buf)
> +			memcpy(buf, nfc->regs + NFC_MAIN_AREA(0) + c, l);

Another 'memcpy(.. nfc->regs);'...

> +		else
> +			if (nfc->alt_buf & ALT_BUF_ID)
> +				*buf = vf610_nfc_get_id(mtd, c);
> +			else
> +				*buf = vf610_nfc_get_status(mtd);
> +
> +		buf += l;
> +		len -= l;
> +	}
> +
> +	/* Handle spare area access */
> +	if (len) {
> +		nfc->column += len;
> +		vf610_nfc_read_spare(mtd, buf, len);
> +	}
> +}
> +
> +/* Write data to NFC buffers */
> +static void vf610_nfc_write_buf(struct mtd_info *mtd, const u_char *buf,
> +				int len)
> +{
> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
> +	uint c = nfc->column;
> +	uint l;
> +
> +	l = min((uint)len, mtd->writesize + mtd->oobsize - c);
> +	nfc->column += l;
> +	memcpy(nfc->regs + NFC_MAIN_AREA(0) + c, buf, l);

Another 'memcpy(.. nfc->regs);'...

[snip]

These memcpy's are the same 'bus' interface as the registers.  We should
be just as worried about this SRAM buffer memory as the memory mapped
registers, shouldn't we?  Is a barrier() before reading and a barrier()
after writing fine for U-Boot?  Personally, I think they are safe as
only the 'vf610_nfc_set(mtd, NFC_FLASH_CMD2, START_BIT)' needs some
care.  Maybe a comment is fine?  It seems the Vybrid is safe for
different access sizes, but it is possible that some other CPU might not
be able to access this memory via 32/16/8 bit accesses and 'memcpy()'
may not be appropriate.  It seems that 'natural' size of the NFC
controller itself is 32bits and the CPU interface does lane masking.
Ie, boot mode documentation talks about remapping
'sram_physical_addr[13:3] = {cpu_addr[11:3],cpu_addr[13:12]}' saying
that bits 2,1 are not used (hopefully one based numbers).  This is just
my guess...

The VF6xx page has a documentation tab,
 http://www.freescale.com/webapp/sps/site/prod_summary.jsp?code=VF6xx

There is an app note, AN4947 'Understanding Vybrid Architecture', which
describes some timing details for the AHB bus (where this flash
controller is connected).  Pg21 Table 7 of that document gives some
measurements.  The QSPI is a similar peripheral on the AHB.  The first
and second lines give accesses of 4408 and subsequent accesses are 2770
Cortex-A5 clocks.  Normal SDRAM is 258 and 8 clocks.  Ie, it is quite
important in places to minimize accesses and try to make them
sequential.

However, it looks like most U-Boot NAND drivers use the memcpy()?   With
the exceptions of fsl_elbc_nand.c, fsl_ifc_nand.c, and mpc5121_nfc.c.
Maybe it doesn't matter...

Fwiw,
Bill Pringlemeir.
Stefan Agner Aug. 18, 2014, 9:41 a.m. UTC | #4
Am 2014-08-14 23:12, schrieb Bill Pringlemeir:
>> On 14 Aug 2014, stefan@agner.ch wrote:
>>
>> This adds initial support for Freescale NFC (NAND Flash Controller)
>> found in ARM Vybrid SoC's, Power Architecture MPC5125 and others.
>> However, this driver is only tested on Vybrid.
> 
> This is only to expand on the nand controller register and SRAM use.
> 
> [snip]
>  
>> diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
>> new file mode 100644
>> index 0000000..3150ac1
>> --- /dev/null
>> +++ b/drivers/mtd/nand/vf610_nfc.c
> 
> [snip]
> 
>> +static inline u32 vf610_nfc_read(struct mtd_info *mtd, uint reg)
>> +{
>> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
>> +
>> +	return readl(nfc->regs + reg);
>> +}
>> +
>> +static inline void vf610_nfc_write(struct mtd_info *mtd, uint reg, u32 val)
>> +{
>> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
>> +
>> +	writel(val, nfc->regs + reg);
>> +}
> 
> Ok, we always use readl/writel.  This is fine, but a little slower and
> bigger.  I may try a register cache if I resubmit to the Linux MTD as
> per Scott's suggestion.  Especially, this version is good for an
> incremental patch.

I measured the difference and get 1MB/s
Full pages, readl/writel:
NAND read: device 0 offset 0x200000, size 0x800000
 8388608 bytes read in 772 ms (10.4 MiB/s): OK

Full pages, __raw_readl/__raw_writel
NAND read: device 0 offset 0x200000, size 0x800000
 8388608 bytes read in 696 ms (11.5 MiB/s): OK


Ok, this is actually quite a lot. Especially since I already optimized
the C code (by not using the helper functions like nfc_set/nfc_clear in
vf610_nfc_send_command), one would think there is now almost no
optimization potential. I looked into the disassembled code and could
narrow down the issue. Due to the memory barriers, all offsets were
calculated on each register access (nfc base to reg base, and add reg
offset), multiple instances of:
  20:	e59cc120 	ldr	ip, [ip, #288]	; 0x120
  24:	e59cc134 	ldr	ip, [ip, #308]	; 0x134

I optimized the code again and calculate the offsets manually and access
__raw_readl/__raw_writel rather then vf610_nfc_read/write in the
vf610_nfc_send_command(s) function, I get the full speed again:

NAND read: device 0 offset 0x200000, size 0x800000
 8388608 bytes read in 687 ms (11.6 MiB/s): OK


> 
> I think these are in 'arch/arm/include/asm/io.h' of U-Boot.
> 
> #define dmb()           __asm__ __volatile__ ("" : : : "memory")
> #define __iormb()       dmb()
> #define __iowmb()       dmb()
> 
> #define readl(c)        ({ u32 __v = __arch_getl(c); __iormb(); __v; })
> #define writel(v,c)     ({ u32 __v = v; __iowmb(); __arch_putl(__v,c); __v; })
> 
> Currently, these look like compiler barriers to me.  Fine so far.
> 
>> +
>> +static inline void vf610_nfc_set(struct mtd_info *mtd, uint reg, u32 bits)
>> +{
>> +	vf610_nfc_write(mtd, reg, vf610_nfc_read(mtd, reg) | bits);
>> +}
>> +
>> +static inline void vf610_nfc_clear(struct mtd_info *mtd, uint reg, u32 bits)
>> +{
>> +	vf610_nfc_write(mtd, reg, vf610_nfc_read(mtd, reg) & ~bits);
>> +}
>> +
>> +static inline void vf610_nfc_set_field(struct mtd_info *mtd, u32 reg,
>> +				       u32 mask, u32 shift, u32 val)
>> +{
>> +	vf610_nfc_write(mtd, reg,
>> +			(vf610_nfc_read(mtd, reg) & (~mask)) | val << shift);
>> +}
>> +
>> +/* Clear flags for upcoming command */
>> +static inline void vf610_nfc_clear_status(struct mtd_info *mtd)
>> +{
>> +	u32 tmp = vf610_nfc_read(mtd, NFC_IRQ_STATUS);
>> +	tmp |= CMD_DONE_CLEAR_BIT | IDLE_CLEAR_BIT;
>> +	vf610_nfc_write(mtd, NFC_IRQ_STATUS, tmp);
>> +}
>> +
>> +/* Wait for complete operation */
>> +static inline void vf610_nfc_done(struct mtd_info *mtd)
>> +{
>> +	uint start;
>> +
>> +	vf610_nfc_set(mtd, NFC_FLASH_CMD2, START_BIT);
>> +	barrier();
> 
> This barrier() is not needed then.  The  vf610_nfc_set() should have
> done it twice already, plus everything is volatile.
> 

Agreed, this is not needed any more.

> [snip]
> 
>> +static inline void vf610_nfc_read_spare(struct mtd_info *mtd, void *buf,
>> +					int len)
>> +{
>> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
>> +
>> +	len = min(mtd->oobsize, (uint)len);
>> +	if (len > 0)
>> +		memcpy(buf, nfc->regs + mtd->writesize, len);
> 
> Notice the 'memcpy(.. nfc->regs);'...
> 
>> +}
>> +
>> +/* Read data from NFC buffers */
>> +static void vf610_nfc_read_buf(struct mtd_info *mtd, u_char *buf, int len)
>> +{
>> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
>> +	uint c = nfc->column;
>> +	uint l;
>> +
>> +	/* Handle main area */
>> +	if (!nfc->spareonly) {
>> +
>> +		l = min((uint)len, mtd->writesize - c);
>> +		nfc->column += l;
>> +
>> +		if (!nfc->alt_buf)
>> +			memcpy(buf, nfc->regs + NFC_MAIN_AREA(0) + c, l);
> 
> Another 'memcpy(.. nfc->regs);'...
> 
>> +		else
>> +			if (nfc->alt_buf & ALT_BUF_ID)
>> +				*buf = vf610_nfc_get_id(mtd, c);
>> +			else
>> +				*buf = vf610_nfc_get_status(mtd);
>> +
>> +		buf += l;
>> +		len -= l;
>> +	}
>> +
>> +	/* Handle spare area access */
>> +	if (len) {
>> +		nfc->column += len;
>> +		vf610_nfc_read_spare(mtd, buf, len);
>> +	}
>> +}
>> +
>> +/* Write data to NFC buffers */
>> +static void vf610_nfc_write_buf(struct mtd_info *mtd, const u_char *buf,
>> +				int len)
>> +{
>> +	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
>> +	uint c = nfc->column;
>> +	uint l;
>> +
>> +	l = min((uint)len, mtd->writesize + mtd->oobsize - c);
>> +	nfc->column += l;
>> +	memcpy(nfc->regs + NFC_MAIN_AREA(0) + c, buf, l);
> 
> Another 'memcpy(.. nfc->regs);'...
> 
> [snip]
> 
> These memcpy's are the same 'bus' interface as the registers.  We should
> be just as worried about this SRAM buffer memory as the memory mapped
> registers, shouldn't we?  Is a barrier() before reading and a barrier()
> after writing fine for U-Boot?  Personally, I think they are safe as
> only the 'vf610_nfc_set(mtd, NFC_FLASH_CMD2, START_BIT)' needs some

I also think that that this is the only place a barrier is really
needed. However, as Scott stated:

On Wed, 2014-08-13 at 22:32, Scott Wood wrote:
> raw_writel() is itself something that should only be used for
> hand-optimized sections.  For non-performance-critical code you should
> use normal writel() so that you don't need to worry about manually
> adding I/O barriers.

The reason I choosed readl/writel instead of the raw variants is to
preserve align with other drivers...


> care.  Maybe a comment is fine?  It seems the Vybrid is safe for
> different access sizes, but it is possible that some other CPU might not
> be able to access this memory via 32/16/8 bit accesses and 'memcpy()'
> may not be appropriate.  It seems that 'natural' size of the NFC
> controller itself is 32bits and the CPU interface does lane masking.
> Ie, boot mode documentation talks about remapping
> 'sram_physical_addr[13:3] = {cpu_addr[11:3],cpu_addr[13:12]}' saying
> that bits 2,1 are not used (hopefully one based numbers).  This is just
> my guess...

What assumptions do you make how memcpy accesses memory? This latest
patch now uses the optimized versions from the kernel... Maybe they even
try to access 64-bit width (the NIC interconnect supports 64-bit access)

> 
> The VF6xx page has a documentation tab,
>  http://www.freescale.com/webapp/sps/site/prod_summary.jsp?code=VF6xx
> 
> There is an app note, AN4947 'Understanding Vybrid Architecture', which
> describes some timing details for the AHB bus (where this flash
> controller is connected).  Pg21 Table 7 of that document gives some
> measurements.  The QSPI is a similar peripheral on the AHB.  The first
> and second lines give accesses of 4408 and subsequent accesses are 2770
> Cortex-A5 clocks.  Normal SDRAM is 258 and 8 clocks.  Ie, it is quite
> important in places to minimize accesses and try to make them
> sequential.

We also have caches, hence I don't think the access will take that long.
And on the other side, the SRAM is much faster.

> 
> However, it looks like most U-Boot NAND drivers use the memcpy()?   With
> the exceptions of fsl_elbc_nand.c, fsl_ifc_nand.c, and mpc5121_nfc.c.
> Maybe it doesn't matter...



--
Stefan
Bill Pringlemeir Aug. 18, 2014, 4:38 p.m. UTC | #5
On 18 Aug 2014, stefan@agner.ch wrote:

> Am 2014-08-14 23:12, schrieb Bill Pringlemeir:
>>> On 14 Aug 2014, stefan@agner.ch wrote:
>>>
>>> This adds initial support for Freescale NFC (NAND Flash Controller)
>>> found in ARM Vybrid SoC's, Power Architecture MPC5125 and others.
>>> However, this driver is only tested on Vybrid.
>>
>> This is only to expand on the nand controller register and SRAM use.
>>
>> [snip]
>>
>>> diff --git a/drivers/mtd/nand/vf610_nfc.c
>>> b/drivers/mtd/nand/vf610_nfc.c new file mode 100644 index
>>> 0000000..3150ac1 --- /dev/null +++ b/drivers/mtd/nand/vf610_nfc.c
>>
>> [snip]
>>
>>> +static inline u32 vf610_nfc_read(struct mtd_info *mtd, uint reg) +{

>> Ok, we always use readl/writel.  This is fine, but a little slower
>> and bigger.  I may try a register cache if I resubmit to the Linux
>> MTD as per Scott's suggestion.  Especially, this version is good for
>> an incremental patch.

> I measured the difference and get 1MB/s
> Full pages, readl/writel:
> NAND read: device 0 offset 0x200000, size 0x800000
> 8388608 bytes read in 772 ms (10.4 MiB/s): OK

> Full pages, __raw_readl/__raw_writel
> NAND read: device 0 offset 0x200000, size 0x800000
> 8388608 bytes read in 696 ms (11.5 MiB/s): OK

> Ok, this is actually quite a lot. Especially since I already optimized
> the C code (by not using the helper functions like nfc_set/nfc_clear
> in vf610_nfc_send_command), one would think there is now almost no
> optimization potential. I looked into the disassembled code and could
> narrow down the issue. Due to the memory barriers, all offsets were
> calculated on each register access (nfc base to reg base, and add reg
> offset), multiple instances of:

>  20:	e59cc120 	ldr	ip, [ip, #288]	; 0x120
>  24:	e59cc134 	ldr	ip, [ip, #308]	; 0x134

> I optimized the code again and calculate the offsets manually and
> access __raw_readl/__raw_writel rather then vf610_nfc_read/write in
> the vf610_nfc_send_command(s) function, I get the full speed again:

> NAND read: device 0 offset 0x200000, size 0x800000
> 8388608 bytes read in 687 ms (11.6 MiB/s): OK

I think what you have is fine.  The 10BM/s versus 11MB/s is not
insignificant, but it is not a huge difference.  I expect that the
driver is already better than the others; especially the Imx-25 was only
7.7MB/s read and 4.6Mb/s write from Linux mtd tests.  Although the end
user might like to wait 10% less for an image to load.

Also, probably a lot of people care about code size.  This is not so
much a case for U-Boot as it is usually machine specific and doesn't
support several SOCs afaik.

However, the more specific you make the optimization to the platform,
the less likely it is to extend well.  We also wish to have this work
well with different gcc versions and CPUs (PowerPC, etc).  The
'readl/writel' handicap the compiler.  Although they are more likely to
work with a wide variety of buses.

[snip]

> On Wed, 2014-08-13 at 22:32, Scott Wood wrote:
>> raw_writel() is itself something that should only be used for
>> hand-optimized sections.  For non-performance-critical code you
>> should use normal writel() so that you don't need to worry about
>> manually adding I/O barriers.
>
> The reason I choosed readl/writel instead of the raw variants is to
> preserve align with other drivers...

>> care.  Maybe a comment is fine?  It seems the Vybrid is safe for
>> different access sizes, but it is possible that some other CPU might
>> not be able to access this memory via 32/16/8 bit accesses and
>> 'memcpy()' may not be appropriate.  It seems that 'natural' size of
>> the NFC controller itself is 32bits and the CPU interface does lane
>> masking.  Ie, boot mode documentation talks about remapping
>> 'sram_physical_addr[13:3] = {cpu_addr[11:3],cpu_addr[13:12]}' saying
>> that bits 2,1 are not used (hopefully one based numbers).  This is
>> just my guess...

> What assumptions do you make how memcpy accesses memory? This latest
> patch now uses the optimized versions from the kernel... Maybe they
> even try to access 64-bit width (the NIC interconnect supports 64-bit
> access)

The memcpy() itself could use anything. 64bits is possible on AXI/NIC.
The 'PBRIDGE' is 64bit, but I think the AIPS/IPS (apparently AIPS means
'AHB-lite to IPS) are 32bit.  At least that is the case on the Imx25
which has a different AIPS version.  I assumed the 'memcpy()' was using
32bits but this certainly isn't explicit in the code.

The majority of the register banks are non-volatile with this
controller.  Instead of running multiple NAND programming sequences, the
controller runs them all for us.  Most registers are are mainly like
SRAM.

My only point is that the SRAM buffers use the same interface as the
main Nand controller register banks.  So using 'readl/writel' for the
register, but not the SRAM buffers seems inconsistent.

So to address this inconsistency, I was thinking that we should at least
have a comment?

 /* BUS: This assumes the BUS is 32 bit accessible.  If you are porting
    to other systems, this may not be the case.
  */
 memcpy(nfc->regs + NFC_MAIN_AREA(0) + c, buf, l);

Or we could implement our own version of memcpy that did 32bit aligned
transfers with a similar comment.  In theory, we need a barrier after
the memcpy(), in case anyone modified the code to touch the
NFC_FLASH_CMD2's START_BIT directly after the memcpy() or custom
function.  But all the paranoia adds some code and has potential to slow
things down.

Doing a barrier after every single byte read as the ARM Linux's
memcpy_fromio() does will surely make significant performance
differences.  Instead of being double the Imx25, it was half.  A user
waiting 400% longer won't be too happy.

Fwiw,
Bill Pringlemeir.
Stefan Agner Aug. 19, 2014, 5 p.m. UTC | #6
Am 2014-08-18 18:38, schrieb Bill Pringlemeir:
> On 18 Aug 2014, stefan@agner.ch wrote:
> 
>> Am 2014-08-14 23:12, schrieb Bill Pringlemeir:
>>>> On 14 Aug 2014, stefan@agner.ch wrote:
>>>>
>>>> This adds initial support for Freescale NFC (NAND Flash Controller)
>>>> found in ARM Vybrid SoC's, Power Architecture MPC5125 and others.
>>>> However, this driver is only tested on Vybrid.
>>>
>>> This is only to expand on the nand controller register and SRAM use.
>>>
>>> [snip]
>>>
>>>> diff --git a/drivers/mtd/nand/vf610_nfc.c
>>>> b/drivers/mtd/nand/vf610_nfc.c new file mode 100644 index
>>>> 0000000..3150ac1 --- /dev/null +++ b/drivers/mtd/nand/vf610_nfc.c
>>>
>>> [snip]
>>>
>>>> +static inline u32 vf610_nfc_read(struct mtd_info *mtd, uint reg) +{
> 
>>> Ok, we always use readl/writel.  This is fine, but a little slower
>>> and bigger.  I may try a register cache if I resubmit to the Linux
>>> MTD as per Scott's suggestion.  Especially, this version is good for
>>> an incremental patch.
> 
>> I measured the difference and get 1MB/s
>> Full pages, readl/writel:
>> NAND read: device 0 offset 0x200000, size 0x800000
>> 8388608 bytes read in 772 ms (10.4 MiB/s): OK
> 
>> Full pages, __raw_readl/__raw_writel
>> NAND read: device 0 offset 0x200000, size 0x800000
>> 8388608 bytes read in 696 ms (11.5 MiB/s): OK
> 
>> Ok, this is actually quite a lot. Especially since I already optimized
>> the C code (by not using the helper functions like nfc_set/nfc_clear
>> in vf610_nfc_send_command), one would think there is now almost no
>> optimization potential. I looked into the disassembled code and could
>> narrow down the issue. Due to the memory barriers, all offsets were
>> calculated on each register access (nfc base to reg base, and add reg
>> offset), multiple instances of:
> 
>>  20:	e59cc120 	ldr	ip, [ip, #288]	; 0x120
>>  24:	e59cc134 	ldr	ip, [ip, #308]	; 0x134
> 
>> I optimized the code again and calculate the offsets manually and
>> access __raw_readl/__raw_writel rather then vf610_nfc_read/write in
>> the vf610_nfc_send_command(s) function, I get the full speed again:
> 
>> NAND read: device 0 offset 0x200000, size 0x800000
>> 8388608 bytes read in 687 ms (11.6 MiB/s): OK
> 
> I think what you have is fine.  The 10BM/s versus 11MB/s is not
> insignificant, but it is not a huge difference.  I expect that the
> driver is already better than the others; especially the Imx-25 was only
> 7.7MB/s read and 4.6Mb/s write from Linux mtd tests.  Although the end
> user might like to wait 10% less for an image to load.
> 
> Also, probably a lot of people care about code size.  This is not so
> much a case for U-Boot as it is usually machine specific and doesn't
> support several SOCs afaik.
> 
> However, the more specific you make the optimization to the platform,
> the less likely it is to extend well.  We also wish to have this work
> well with different gcc versions and CPUs (PowerPC, etc).  The
> 'readl/writel' handicap the compiler.  Although they are more likely to
> work with a wide variety of buses.
> 
> [snip]
> 
>> On Wed, 2014-08-13 at 22:32, Scott Wood wrote:
>>> raw_writel() is itself something that should only be used for
>>> hand-optimized sections.  For non-performance-critical code you
>>> should use normal writel() so that you don't need to worry about
>>> manually adding I/O barriers.
>>
>> The reason I choosed readl/writel instead of the raw variants is to
>> preserve align with other drivers...
> 
>>> care.  Maybe a comment is fine?  It seems the Vybrid is safe for
>>> different access sizes, but it is possible that some other CPU might
>>> not be able to access this memory via 32/16/8 bit accesses and
>>> 'memcpy()' may not be appropriate.  It seems that 'natural' size of
>>> the NFC controller itself is 32bits and the CPU interface does lane
>>> masking.  Ie, boot mode documentation talks about remapping
>>> 'sram_physical_addr[13:3] = {cpu_addr[11:3],cpu_addr[13:12]}' saying
>>> that bits 2,1 are not used (hopefully one based numbers).  This is
>>> just my guess...
> 
>> What assumptions do you make how memcpy accesses memory? This latest
>> patch now uses the optimized versions from the kernel... Maybe they
>> even try to access 64-bit width (the NIC interconnect supports 64-bit
>> access)
> 
> The memcpy() itself could use anything. 64bits is possible on AXI/NIC.
> The 'PBRIDGE' is 64bit, but I think the AIPS/IPS (apparently AIPS means
> 'AHB-lite to IPS) are 32bit.  At least that is the case on the Imx25
> which has a different AIPS version.  I assumed the 'memcpy()' was using
> 32bits but this certainly isn't explicit in the code.
> 
> The majority of the register banks are non-volatile with this
> controller.  Instead of running multiple NAND programming sequences, the
> controller runs them all for us.  Most registers are are mainly like
> SRAM.
> 
> My only point is that the SRAM buffers use the same interface as the
> main Nand controller register banks.  So using 'readl/writel' for the
> register, but not the SRAM buffers seems inconsistent.
> 
> So to address this inconsistency, I was thinking that we should at least
> have a comment?
> 
>  /* BUS: This assumes the BUS is 32 bit accessible.  If you are porting
>     to other systems, this may not be the case.
>   */
>  memcpy(nfc->regs + NFC_MAIN_AREA(0) + c, buf, l);
> 

IMHO, we just treat this as if its memory and I guess this is fine for a
buffer. memcpy knows how to copy data, and takes care if the
architecture needs aligned access when reading 32-bit width, or similar
requirements. We do not know whether memcpy really uses 32-bit accesses,
hence this comment might even be wrong. In a short test, I could also
access the buffer in byte/word length (tested using md.b/md.w).

Also, I assume this just works for a different architecture too. If not,
the one using this driver the first time on a different architecture
would see this pretty quickly I guess :-)


> Or we could implement our own version of memcpy that did 32bit aligned
> transfers with a similar comment.  In theory, we need a barrier after
> the memcpy(), in case anyone modified the code to touch the
> NFC_FLASH_CMD2's START_BIT directly after the memcpy() or custom
> function.  But all the paranoia adds some code and has potential to slow
> things down.

Well that would be a reordering accross multiple function reads, and
many instructions right now. I don't think that this is a valid case to
introduce a barrier.

> Doing a barrier after every single byte read as the ARM Linux's
> memcpy_fromio() does will surely make significant performance
> differences.  Instead of being double the Imx25, it was half.  A user
> waiting 400% longer won't be too happy.

There are also patches floating around which just use memcpy:
http://lists.infradead.org/pipermail/linux-arm-kernel/2013-June/173195.html


In our case, a barrier just after the memcpy would be sufficient.

Somehow your comment and my latest patch revision crossed each other.
Could you post your comment on the latest revision in case you are fine
with it?

--
Stefan
Bill Pringlemeir Aug. 21, 2014, 9:15 p.m. UTC | #7
>>>>> On 14 Aug 2014, stefan@agner.ch wrote:

>>>>> This adds initial support for Freescale NFC (NAND Flash
>>>>> Controller) found in ARM Vybrid SoC's, Power Architecture MPC5125
>>>>> and others.  However, this driver is only tested on Vybrid.

>>> On Wed, 2014-08-13 at 22:32, Scott Wood wrote:

>>>> raw_writel() is itself something that should only be used for
>>>> hand-optimized sections.  For non-performance-critical code you
>>>> should use normal writel() so that you don't need to worry about
>>>> manually adding I/O barriers.

>>> Am 2014-08-14 23:12, schrieb Bill Pringlemeir:

[regarding memcpy() in the driver]

>>>> Maybe a comment is fine?  It seems the Vybrid is safe for
>>>> different access sizes, but it is possible that some other CPU
>>>> might not be able to access this memory via 32/16/8 bit accesses
>>>> and 'memcpy()' may not be appropriate.  It seems that 'natural'
>>>> size of the NFC controller itself is 32bits and the CPU interface
>>>> does lane masking.  Ie, boot mode documentation talks about
>>>> remapping 'sram_physical_addr[13:3] =
>>>> {cpu_addr[11:3],cpu_addr[13:12]}' saying that bits 2,1 are not used
>>>> (hopefully one based numbers).  This is just my guess...

>> On 18 Aug 2014, stefan@agner.ch wrote:
>>> What assumptions do you make how memcpy accesses memory? This latest
>>> patch now uses the optimized versions from the kernel... Maybe they
>>> even try to access 64-bit width (the NIC interconnect supports
>>> 64-bit access)

[snip]

> Am 2014-08-18 18:38, schrieb Bill Pringlemeir:

>> My only point is that the SRAM buffers use the same interface as the
>> main Nand controller register banks.  So using 'readl/writel' for the
>> register, but not the SRAM buffers seems inconsistent.

>> So to address this inconsistency, I was thinking that we should at
>> least have a comment?

On 19 Aug 2014, stefan@agner.ch wrote:

> IMHO, we just treat this as if its memory and I guess this is fine for
> a buffer. memcpy knows how to copy data, and takes care if the
> architecture needs aligned access when reading 32-bit width, or
> similar requirements. We do not know whether memcpy really uses 32-bit
> accesses, hence this comment might even be wrong. In a short test, I
> could also access the buffer in byte/word length (tested using
> md.b/md.w).

> Also, I assume this just works for a different architecture too. If
> not, the one using this driver the first time on a different
> architecture would see this pretty quickly I guess :-)

[snip]

> In our case, a barrier just after the memcpy would be sufficient.

I would suggest you make a 'vf610_nfc_memcpy()' [or even from/to
variants if you are pendantic] which can be a wrapper function of just
'memcpy'.  Just the like the readl/writel wrappers this will collect the
BUS accesses into one place.  So they are documented for people porting
the code.  Trying to accommodate some future insane hardware hookup seems
futile beyond this?

Otherwise, I will add an 'Ack' or 'Reviewed-By' from me if you like.  I
am sorry, I don't know what if anything is appropriate.

Thanks,
Bill Pringlemeir.
Stefano Babic Sept. 11, 2014, 9:37 a.m. UTC | #8
On 21/08/2014 23:15, Bill Pringlemeir wrote:
> 
>>>>>> On 14 Aug 2014, stefan@agner.ch wrote:
> 
>>>>>> This adds initial support for Freescale NFC (NAND Flash
>>>>>> Controller) found in ARM Vybrid SoC's, Power Architecture MPC5125
>>>>>> and others.  However, this driver is only tested on Vybrid.
> 
>>>> On Wed, 2014-08-13 at 22:32, Scott Wood wrote:
> 
>>>>> raw_writel() is itself something that should only be used for
>>>>> hand-optimized sections.  For non-performance-critical code you
>>>>> should use normal writel() so that you don't need to worry about
>>>>> manually adding I/O barriers.
> 
>>>> Am 2014-08-14 23:12, schrieb Bill Pringlemeir:
> 
> [regarding memcpy() in the driver]
> 
>>>>> Maybe a comment is fine?  It seems the Vybrid is safe for
>>>>> different access sizes, but it is possible that some other CPU
>>>>> might not be able to access this memory via 32/16/8 bit accesses
>>>>> and 'memcpy()' may not be appropriate.  It seems that 'natural'
>>>>> size of the NFC controller itself is 32bits and the CPU interface
>>>>> does lane masking.  Ie, boot mode documentation talks about
>>>>> remapping 'sram_physical_addr[13:3] =
>>>>> {cpu_addr[11:3],cpu_addr[13:12]}' saying that bits 2,1 are not used
>>>>> (hopefully one based numbers).  This is just my guess...
> 
>>> On 18 Aug 2014, stefan@agner.ch wrote:
>>>> What assumptions do you make how memcpy accesses memory? This latest
>>>> patch now uses the optimized versions from the kernel... Maybe they
>>>> even try to access 64-bit width (the NIC interconnect supports
>>>> 64-bit access)
> 
> [snip]
> 
>> Am 2014-08-18 18:38, schrieb Bill Pringlemeir:
> 
>>> My only point is that the SRAM buffers use the same interface as the
>>> main Nand controller register banks.  So using 'readl/writel' for the
>>> register, but not the SRAM buffers seems inconsistent.
> 
>>> So to address this inconsistency, I was thinking that we should at
>>> least have a comment?
> 
> On 19 Aug 2014, stefan@agner.ch wrote:
> 
>> IMHO, we just treat this as if its memory and I guess this is fine for
>> a buffer. memcpy knows how to copy data, and takes care if the
>> architecture needs aligned access when reading 32-bit width, or
>> similar requirements. We do not know whether memcpy really uses 32-bit
>> accesses, hence this comment might even be wrong. In a short test, I
>> could also access the buffer in byte/word length (tested using
>> md.b/md.w).
> 
>> Also, I assume this just works for a different architecture too. If
>> not, the one using this driver the first time on a different
>> architecture would see this pretty quickly I guess :-)
> 
> [snip]
> 
>> In our case, a barrier just after the memcpy would be sufficient.
> 
> I would suggest you make a 'vf610_nfc_memcpy()' [or even from/to
> variants if you are pendantic] which can be a wrapper function of just
> 'memcpy'.  Just the like the readl/writel wrappers this will collect the
> BUS accesses into one place.  So they are documented for people porting
> the code.  Trying to accommodate some future insane hardware hookup seems
> futile beyond this?
> 
> Otherwise, I will add an 'Ack' or 'Reviewed-By' from me if you like.  I
> am sorry, I don't know what if anything is appropriate.

Both are appropraite. IMHO you are an author and you checked the code
after Stefan's porting: ACK should be the best choice,.

Best regards,
Stefano Babic
diff mbox

Patch

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index bf1312a..eef86d1 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -51,6 +51,7 @@  obj-$(CONFIG_NAND_KB9202) += kb9202_nand.o
 obj-$(CONFIG_NAND_KIRKWOOD) += kirkwood_nand.o
 obj-$(CONFIG_NAND_KMETER1) += kmeter1_nand.o
 obj-$(CONFIG_NAND_MPC5121_NFC) += mpc5121_nfc.o
+obj-$(CONFIG_NAND_VF610_NFC) += vf610_nfc.o
 obj-$(CONFIG_NAND_MXC) += mxc_nand.o
 obj-$(CONFIG_NAND_MXS) += mxs_nand.o
 obj-$(CONFIG_NAND_NDFC) += ndfc.o
diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
new file mode 100644
index 0000000..3150ac1
--- /dev/null
+++ b/drivers/mtd/nand/vf610_nfc.c
@@ -0,0 +1,706 @@ 
+/*
+ * Copyright 2009-2014 Freescale Semiconductor, Inc. and others
+ *
+ * Description: MPC5125, VF610, MCF54418 and Kinetis K70 Nand driver.
+ * Ported to U-Boot by Stefan Agner
+ * Based on RFC driver posted on Kernel Mailing list by Bill Pringlemeir
+ * Jason ported to M54418TWR and MVFA5.
+ * Authors: Stefan Agner <stefan.agner@toradex.com>
+ *          Bill Pringlemeir <bpringlemeir@nbsps.com>
+ *          Shaohui Xie <b21989@freescale.com>
+ *          Jason Jin <Jason.jin@freescale.com>
+ *
+ * Based on original driver mpc5121_nfc.c.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Limitations:
+ * - Untested on MPC5125 and M54418.
+ * - DMA not used.
+ * - 2K pages or less.
+ * - Only 2K page w. 64+OOB and hardware ECC.
+ */
+
+#include <common.h>
+#include <malloc.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+
+#include <nand.h>
+#include <errno.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#define	DRV_NAME		"fsl_nfc"
+
+/* Register Offsets */
+#define NFC_FLASH_CMD1			0x3F00
+#define NFC_FLASH_CMD2			0x3F04
+#define NFC_COL_ADDR			0x3F08
+#define NFC_ROW_ADDR			0x3F0c
+#define NFC_ROW_ADDR_INC		0x3F14
+#define NFC_FLASH_STATUS1		0x3F18
+#define NFC_FLASH_STATUS2		0x3F1c
+#define NFC_CACHE_SWAP			0x3F28
+#define NFC_SECTOR_SIZE			0x3F2c
+#define NFC_FLASH_CONFIG		0x3F30
+#define NFC_IRQ_STATUS			0x3F38
+
+/* Addresses for NFC MAIN RAM BUFFER areas */
+#define NFC_MAIN_AREA(n)		((n) *  0x1000)
+
+#define PAGE_2K				0x0800
+#define OOB_64				0x0040
+
+/*
+ * NFC_CMD2[CODE] values. See section:
+ *  - 31.4.7 Flash Command Code Description, Vybrid manual
+ *  - 23.8.6 Flash Command Sequencer, MPC5125 manual
+ *
+ * Briefly these are bitmasks of controller cycles.
+ */
+#define READ_PAGE_CMD_CODE		0x7EE0
+#define PROGRAM_PAGE_CMD_CODE		0x7FC0
+#define ERASE_CMD_CODE			0x4EC0
+#define READ_ID_CMD_CODE		0x4804
+#define RESET_CMD_CODE			0x4040
+#define STATUS_READ_CMD_CODE		0x4068
+
+/* NFC ECC mode define */
+#define ECC_BYPASS			0
+#define ECC_45_BYTE			6
+
+/*** Register Mask and bit definitions */
+
+/* NFC_FLASH_CMD1 Field */
+#define CMD_BYTE2_MASK				0xFF000000
+#define CMD_BYTE2_SHIFT				24
+
+/* NFC_FLASH_CM2 Field */
+#define CMD_BYTE1_MASK				0xFF000000
+#define CMD_BYTE1_SHIFT				24
+#define CMD_CODE_MASK				0x00FFFF00
+#define CMD_CODE_SHIFT				8
+#define BUFNO_MASK				0x00000006
+#define BUFNO_SHIFT				1
+#define START_BIT				(1<<0)
+
+/* NFC_COL_ADDR Field */
+#define COL_ADDR_MASK				0x0000FFFF
+#define COL_ADDR_SHIFT				0
+
+/* NFC_ROW_ADDR Field */
+#define ROW_ADDR_MASK				0x00FFFFFF
+#define ROW_ADDR_SHIFT				0
+#define ROW_ADDR_CHIP_SEL_RB_MASK		0xF0000000
+#define ROW_ADDR_CHIP_SEL_RB_SHIFT		28
+#define ROW_ADDR_CHIP_SEL_MASK			0x0F000000
+#define ROW_ADDR_CHIP_SEL_SHIFT			24
+
+/* NFC_FLASH_STATUS2 Field */
+#define STATUS_BYTE1_MASK			0x000000FF
+
+/* NFC_FLASH_CONFIG Field */
+#define CONFIG_ECC_SRAM_ADDR_MASK		0x7FC00000
+#define CONFIG_ECC_SRAM_ADDR_SHIFT		22
+#define CONFIG_ECC_SRAM_REQ_BIT			(1<<21)
+#define CONFIG_DMA_REQ_BIT			(1<<20)
+#define CONFIG_ECC_MODE_MASK			0x000E0000
+#define CONFIG_ECC_MODE_SHIFT			17
+#define CONFIG_FAST_FLASH_BIT			(1<<16)
+#define CONFIG_16BIT				(1<<7)
+#define CONFIG_BOOT_MODE_BIT			(1<<6)
+#define CONFIG_ADDR_AUTO_INCR_BIT		(1<<5)
+#define CONFIG_BUFNO_AUTO_INCR_BIT		(1<<4)
+#define CONFIG_PAGE_CNT_MASK			0xF
+#define CONFIG_PAGE_CNT_SHIFT			0
+
+/* NFC_IRQ_STATUS Field */
+#define IDLE_IRQ_BIT				(1<<29)
+#define IDLE_EN_BIT				(1<<20)
+#define CMD_DONE_CLEAR_BIT			(1<<18)
+#define IDLE_CLEAR_BIT				(1<<17)
+
+#define NFC_TIMEOUT	(1000)
+
+/* ECC status placed at end of buffers. */
+#define ECC_SRAM_ADDR	((PAGE_2K+256-8) >> 3)
+#define ECC_STATUS_MASK	0x80
+#define ECC_ERR_COUNT	0x3F
+
+/*
+ * ECC status is stored at NFC_CFG[ECCADD] +4 for little-endian
+ * and +7 for big-endian SOC.
+ */
+#ifdef CONFIG_VF610
+#define ECC_OFFSET	4
+#else
+#define ECC_OFFSET	7
+#endif
+
+struct vf610_nfc {
+	struct mtd_info	  *mtd;
+	struct nand_chip   chip;
+	struct device	  *dev;
+	void __iomem	  *regs;
+	uint               column;
+	int                spareonly;
+	int                page;
+	/* Status and ID are in alternate locations. */
+	int                alt_buf;
+#define ALT_BUF_ID   1
+#define ALT_BUF_STAT 2
+	struct clk        *clk;
+};
+
+#define mtd_to_nfc(_mtd) (struct vf610_nfc *)((struct nand_chip *)_mtd->priv)->priv;
+
+static u8 bbt_pattern[] = {'B', 'b', 't', '0' };
+static u8 mirror_pattern[] = {'1', 't', 'b', 'B' };
+
+static struct nand_bbt_descr bbt_main_descr = {
+	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE |
+		   NAND_BBT_2BIT | NAND_BBT_VERSION,
+	.offs =	11,
+	.len = 4,
+	.veroffs = 15,
+	.maxblocks = 4,
+	.pattern = bbt_pattern,
+};
+
+static struct nand_bbt_descr bbt_mirror_descr = {
+	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE |
+		   NAND_BBT_2BIT | NAND_BBT_VERSION,
+	.offs =	11,
+	.len = 4,
+	.veroffs = 15,
+	.maxblocks = 4,
+	.pattern = mirror_pattern,
+};
+
+static struct nand_ecclayout vf610_nfc_ecc45 = {
+	.eccbytes = 45,
+	.eccpos = {19, 20, 21, 22, 23,
+		   24, 25, 26, 27, 28, 29, 30, 31,
+		   32, 33, 34, 35, 36, 37, 38, 39,
+		   40, 41, 42, 43, 44, 45, 46, 47,
+		   48, 49, 50, 51, 52, 53, 54, 55,
+		   56, 57, 58, 59, 60, 61, 62, 63},
+	.oobfree = {
+		{.offset = 8,
+		 .length = 11} }
+};
+
+static inline u32 vf610_nfc_read(struct mtd_info *mtd, uint reg)
+{
+	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+
+	return readl(nfc->regs + reg);
+}
+
+static inline void vf610_nfc_write(struct mtd_info *mtd, uint reg, u32 val)
+{
+	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+
+	writel(val, nfc->regs + reg);
+}
+
+static inline void vf610_nfc_set(struct mtd_info *mtd, uint reg, u32 bits)
+{
+	vf610_nfc_write(mtd, reg, vf610_nfc_read(mtd, reg) | bits);
+}
+
+static inline void vf610_nfc_clear(struct mtd_info *mtd, uint reg, u32 bits)
+{
+	vf610_nfc_write(mtd, reg, vf610_nfc_read(mtd, reg) & ~bits);
+}
+
+static inline void vf610_nfc_set_field(struct mtd_info *mtd, u32 reg,
+				       u32 mask, u32 shift, u32 val)
+{
+	vf610_nfc_write(mtd, reg,
+			(vf610_nfc_read(mtd, reg) & (~mask)) | val << shift);
+}
+
+/* Clear flags for upcoming command */
+static inline void vf610_nfc_clear_status(struct mtd_info *mtd)
+{
+	u32 tmp = vf610_nfc_read(mtd, NFC_IRQ_STATUS);
+	tmp |= CMD_DONE_CLEAR_BIT | IDLE_CLEAR_BIT;
+	vf610_nfc_write(mtd, NFC_IRQ_STATUS, tmp);
+}
+
+/* Wait for complete operation */
+static inline void vf610_nfc_done(struct mtd_info *mtd)
+{
+	uint start;
+
+	vf610_nfc_set(mtd, NFC_FLASH_CMD2, START_BIT);
+	barrier();
+
+	start = get_timer(0);
+
+	while (!(vf610_nfc_read(mtd, NFC_IRQ_STATUS) & IDLE_IRQ_BIT)) {
+		if (get_timer(start) > NFC_TIMEOUT) {
+			printf("Timeout while waiting for !BUSY.\n");
+			return;
+		}
+	}
+	vf610_nfc_clear_status(mtd);
+}
+
+static u8 vf610_nfc_get_id(struct mtd_info *mtd, int col)
+{
+	u32 flash_id;
+
+	if (col < 4) {
+		flash_id = vf610_nfc_read(mtd, NFC_FLASH_STATUS1);
+		return (flash_id >> (3-col)*8) & 0xff;
+	} else {
+		flash_id = vf610_nfc_read(mtd, NFC_FLASH_STATUS2);
+		return flash_id >> 24;
+	}
+}
+
+static u8 vf610_nfc_get_status(struct mtd_info *mtd)
+{
+	return vf610_nfc_read(mtd, NFC_FLASH_STATUS2) & STATUS_BYTE1_MASK;
+}
+
+/* Single command */
+static void vf610_nfc_send_command(struct mtd_info *mtd, u32 cmd_byte1,
+				   u32 cmd_code)
+{
+	u32 tmp = 0;
+	vf610_nfc_clear_status(mtd);
+
+	tmp = vf610_nfc_read(mtd, NFC_FLASH_CMD2);
+	tmp &= ~(CMD_BYTE1_MASK | CMD_CODE_MASK | BUFNO_MASK);
+	tmp |= cmd_byte1 << CMD_BYTE1_SHIFT;
+	tmp |= cmd_code << CMD_CODE_SHIFT;
+	vf610_nfc_write(mtd, NFC_FLASH_CMD2, tmp);
+}
+
+/* Two commands */
+static void vf610_nfc_send_commands(struct mtd_info *mtd, u32 cmd_byte1,
+			      u32 cmd_byte2, u32 cmd_code)
+{
+	u32 tmp;
+	vf610_nfc_send_command(mtd, cmd_byte1, cmd_code);
+
+	tmp = vf610_nfc_read(mtd, NFC_FLASH_CMD1);
+	tmp &= ~CMD_BYTE2_MASK;
+	tmp |= cmd_byte2 << CMD_BYTE2_SHIFT;
+	vf610_nfc_write(mtd, NFC_FLASH_CMD1, tmp);
+}
+
+static void vf610_nfc_addr_cycle(struct mtd_info *mtd, int column, int page)
+{
+	if (column != -1) {
+		struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+		if (nfc->chip.options | NAND_BUSWIDTH_16)
+			column = column/2;
+		vf610_nfc_set_field(mtd, NFC_COL_ADDR, COL_ADDR_MASK,
+			      COL_ADDR_SHIFT, column);
+	}
+	if (page != -1)
+		vf610_nfc_set_field(mtd, NFC_ROW_ADDR, ROW_ADDR_MASK,
+				ROW_ADDR_SHIFT, page);
+}
+
+/* Send command to NAND chip */
+static void vf610_nfc_command(struct mtd_info *mtd, unsigned command,
+			      int column, int page)
+{
+	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+
+	nfc->column     = max(column, 0);
+	nfc->spareonly	= 0;
+	nfc->alt_buf	= 0;
+
+	switch (command) {
+	case NAND_CMD_PAGEPROG:
+		nfc->page = -1;
+		vf610_nfc_send_commands(mtd, NAND_CMD_SEQIN,
+			     command, PROGRAM_PAGE_CMD_CODE);
+		vf610_nfc_addr_cycle(mtd, column, page);
+		break;
+
+	case NAND_CMD_RESET:
+		vf610_nfc_send_command(mtd, command, RESET_CMD_CODE);
+		break;
+	/*
+	 * NFC does not support sub-page reads and writes,
+	 * so emulate them using full page transfers.
+	 */
+	case NAND_CMD_READOOB:
+		nfc->spareonly = 1;
+	case NAND_CMD_SEQIN: /* Pre-read for partial writes. */
+	case NAND_CMD_READ0:
+		column = 0;
+		/* Already read? */
+		if (nfc->page == page)
+			return;
+		nfc->page = page;
+		vf610_nfc_send_commands(mtd, NAND_CMD_READ0,
+				  NAND_CMD_READSTART, READ_PAGE_CMD_CODE);
+		vf610_nfc_addr_cycle(mtd, column, page);
+		break;
+
+	case NAND_CMD_ERASE1:
+		if (nfc->page == page)
+			nfc->page = -1;
+		vf610_nfc_send_commands(mtd, command,
+				  NAND_CMD_ERASE2, ERASE_CMD_CODE);
+		vf610_nfc_addr_cycle(mtd, column, page);
+		break;
+
+	case NAND_CMD_READID:
+		nfc->alt_buf = ALT_BUF_ID;
+		vf610_nfc_send_command(mtd, command, READ_ID_CMD_CODE);
+		break;
+
+	case NAND_CMD_STATUS:
+		nfc->alt_buf = ALT_BUF_STAT;
+		vf610_nfc_send_command(mtd, command, STATUS_READ_CMD_CODE);
+		break;
+	default:
+		return;
+	}
+
+	vf610_nfc_done(mtd);
+}
+
+static inline void vf610_nfc_read_spare(struct mtd_info *mtd, void *buf,
+					int len)
+{
+	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+
+	len = min(mtd->oobsize, (uint)len);
+	if (len > 0)
+		memcpy(buf, nfc->regs + mtd->writesize, len);
+}
+
+/* Read data from NFC buffers */
+static void vf610_nfc_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+	uint c = nfc->column;
+	uint l;
+
+	/* Handle main area */
+	if (!nfc->spareonly) {
+
+		l = min((uint)len, mtd->writesize - c);
+		nfc->column += l;
+
+		if (!nfc->alt_buf)
+			memcpy(buf, nfc->regs + NFC_MAIN_AREA(0) + c, l);
+		else
+			if (nfc->alt_buf & ALT_BUF_ID)
+				*buf = vf610_nfc_get_id(mtd, c);
+			else
+				*buf = vf610_nfc_get_status(mtd);
+
+		buf += l;
+		len -= l;
+	}
+
+	/* Handle spare area access */
+	if (len) {
+		nfc->column += len;
+		vf610_nfc_read_spare(mtd, buf, len);
+	}
+}
+
+/* Write data to NFC buffers */
+static void vf610_nfc_write_buf(struct mtd_info *mtd, const u_char *buf,
+				int len)
+{
+	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+	uint c = nfc->column;
+	uint l;
+
+	l = min((uint)len, mtd->writesize + mtd->oobsize - c);
+	nfc->column += l;
+	memcpy(nfc->regs + NFC_MAIN_AREA(0) + c, buf, l);
+}
+
+/* Read byte from NFC buffers */
+static u8 vf610_nfc_read_byte(struct mtd_info *mtd)
+{
+	u8 tmp;
+	vf610_nfc_read_buf(mtd, &tmp, sizeof(tmp));
+	return tmp;
+}
+
+/* Read word from NFC buffers */
+static u16 vf610_nfc_read_word(struct mtd_info *mtd)
+{
+	u16 tmp;
+	vf610_nfc_read_buf(mtd, (u_char *)&tmp, sizeof(tmp));
+	return tmp;
+}
+
+/* If not provided, upper layers apply a fixed delay. */
+static int vf610_nfc_dev_ready(struct mtd_info *mtd)
+{
+	/* NFC handles R/B internally; always ready.  */
+	return 1;
+}
+
+/*
+ * This function supports Vybrid only (MPC5125 would have full RB and four CS)
+ */
+static void vf610_nfc_select_chip(struct mtd_info *mtd, int chip)
+{
+#ifdef CONFIG_VF610
+	u32 tmp = vf610_nfc_read(mtd, NFC_ROW_ADDR);
+	tmp &= ~(ROW_ADDR_CHIP_SEL_RB_MASK | ROW_ADDR_CHIP_SEL_MASK);
+	tmp |= 1 << ROW_ADDR_CHIP_SEL_RB_SHIFT;
+
+	if (chip == 0)
+		tmp |= 1 << ROW_ADDR_CHIP_SEL_SHIFT;
+	else if (chip == 1)
+		tmp |= 2 << ROW_ADDR_CHIP_SEL_SHIFT;
+
+	vf610_nfc_write(mtd, NFC_ROW_ADDR, tmp);
+#endif
+}
+
+/* Count the number of 0's in buff upto max_bits */
+static inline int count_written_bits(uint8_t *buff, int size, int max_bits)
+{
+	uint32_t *buff32 = (uint32_t *)buff;
+	int k, written_bits = 0;
+
+	for (k = 0; k < (size / 4); k++) {
+		written_bits += hweight32(~buff32[k]);
+		if (written_bits > max_bits)
+			break;
+	}
+
+	return written_bits;
+}
+
+static inline int vf610_nfc_correct_data(struct mtd_info *mtd, u_char *dat)
+{
+	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+	u8 ecc_status;
+	u8 ecc_count;
+	int flip;
+
+	ecc_status = __raw_readb(nfc->regs + ECC_SRAM_ADDR * 8 + ECC_OFFSET);
+	ecc_count = ecc_status & ECC_ERR_COUNT;
+	if (!(ecc_status & ECC_STATUS_MASK))
+		return ecc_count;
+
+	/* If 'ecc_count' zero or less then buffer is all 0xff or erased. */
+	flip = count_written_bits(dat, nfc->chip.ecc.size, ecc_count);
+
+	/* ECC failed. */
+	if (flip > ecc_count)
+		return -1;
+
+	/* Erased page. */
+	memset(dat, 0xff, nfc->chip.ecc.size);
+	return 0;
+}
+
+
+static int vf610_nfc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
+{
+	int eccsize = chip->ecc.size;
+	int stat;
+	uint8_t *p = buf;
+
+
+	vf610_nfc_read_buf(mtd, p, eccsize);
+
+	if (oob_required)
+		vf610_nfc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	stat = vf610_nfc_correct_data(mtd, p);
+
+	if (stat < 0)
+		mtd->ecc_stats.failed++;
+	else
+		mtd->ecc_stats.corrected += stat;
+
+	return 0;
+}
+
+/*
+ * ECC will be calculated automatically
+ */
+static int vf610_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+			       const uint8_t *buf, int oob_required)
+{
+	vf610_nfc_write_buf(mtd, buf, mtd->writesize);
+	if (oob_required)
+		vf610_nfc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	return 0;
+}
+
+struct vf610_nfc_config {
+	int hardware_ecc;
+	int width;
+	int flash_bbt;
+};
+
+static int vf610_nfc_nand_init(int devnum, u8 *addr)
+{
+	struct mtd_info *mtd = &nand_info[devnum];
+	struct nand_chip *chip;
+	struct vf610_nfc *nfc;
+	int err = 0;
+	int page_sz;
+	struct vf610_nfc_config cfg = {
+		.hardware_ecc = 1,
+#ifdef CONFIG_SYS_NAND_BUSWIDTH_16BIT
+		.width = 16,
+#else
+		.width = 8,
+#endif
+		.flash_bbt = 1,
+	};
+
+	nfc = malloc(sizeof(*nfc));
+	if (!nfc) {
+		printf(KERN_ERR DRV_NAME ": Memory exhausted!\n");
+		return -ENOMEM;
+	}
+
+	chip = &nfc->chip;
+	chip->IO_ADDR_R = chip->IO_ADDR_W = nfc->regs = (void __iomem *)addr;
+
+	mtd->priv = chip;
+	chip->priv = nfc;
+
+	if (cfg.width == 16) {
+		chip->options |= NAND_BUSWIDTH_16;
+		vf610_nfc_set(mtd, NFC_FLASH_CONFIG, CONFIG_16BIT);
+	} else {
+		chip->options &= ~NAND_BUSWIDTH_16;
+		vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_16BIT);
+	}
+
+	chip->dev_ready = vf610_nfc_dev_ready;
+	chip->cmdfunc = vf610_nfc_command;
+	chip->read_byte = vf610_nfc_read_byte;
+	chip->read_word = vf610_nfc_read_word;
+	chip->read_buf = vf610_nfc_read_buf;
+	chip->write_buf = vf610_nfc_write_buf;
+	chip->select_chip = vf610_nfc_select_chip;
+
+	/* Bad block options. */
+	if (cfg.flash_bbt)
+		chip->bbt_options = NAND_BBT_USE_FLASH | NAND_BBT_CREATE;
+
+	/* Default to software ECC until flash ID. */
+	vf610_nfc_set_field(mtd, NFC_FLASH_CONFIG,
+		      CONFIG_ECC_MODE_MASK,
+		      CONFIG_ECC_MODE_SHIFT, ECC_BYPASS);
+
+	chip->bbt_td = &bbt_main_descr;
+	chip->bbt_md = &bbt_mirror_descr;
+
+	page_sz = PAGE_2K + OOB_64;
+	page_sz += cfg.width == 16 ? 1 : 0;
+	vf610_nfc_write(mtd, NFC_SECTOR_SIZE, page_sz);
+
+	/* Set configuration register. */
+	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_ADDR_AUTO_INCR_BIT);
+	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_BUFNO_AUTO_INCR_BIT);
+	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_BOOT_MODE_BIT);
+	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_DMA_REQ_BIT);
+	vf610_nfc_set(mtd, NFC_FLASH_CONFIG, CONFIG_FAST_FLASH_BIT);
+
+	/* Enable Idle IRQ */
+	vf610_nfc_set(mtd, NFC_IRQ_STATUS, IDLE_EN_BIT);
+
+	/* PAGE_CNT = 1 */
+	vf610_nfc_set_field(mtd, NFC_FLASH_CONFIG, CONFIG_PAGE_CNT_MASK,
+			CONFIG_PAGE_CNT_SHIFT, 1);
+
+	/* Set ECC_STATUS offset */
+	vf610_nfc_set_field(mtd, NFC_FLASH_CONFIG,
+		      CONFIG_ECC_SRAM_ADDR_MASK,
+		      CONFIG_ECC_SRAM_ADDR_SHIFT, ECC_SRAM_ADDR);
+
+	/* first scan to find the device and get the page size */
+	if (nand_scan_ident(mtd, CONFIG_SYS_MAX_NAND_DEVICE, NULL)) {
+		err = -ENXIO;
+		goto error;
+	}
+
+	chip->ecc.mode = NAND_ECC_SOFT; /* default */
+
+	page_sz = mtd->writesize + mtd->oobsize;
+
+	/* Single buffer only, max 256 OOB minus ECC status */
+	if (page_sz > PAGE_2K + 256 - 8) {
+		dev_err(nfc->dev, "Unsupported flash size\n");
+		err = -ENXIO;
+		goto error;
+	}
+	page_sz += cfg.width == 16 ? 1 : 0;
+	vf610_nfc_write(mtd, NFC_SECTOR_SIZE, page_sz);
+
+	if (cfg.hardware_ecc) {
+		if (mtd->writesize != PAGE_2K && mtd->oobsize < 64) {
+			dev_err(nfc->dev, "Unsupported flash with hwecc\n");
+			err = -ENXIO;
+			goto error;
+		}
+
+		chip->ecc.layout = &vf610_nfc_ecc45;
+
+		/* propagate ecc.layout to mtd_info */
+		mtd->ecclayout = chip->ecc.layout;
+		chip->ecc.read_page = vf610_nfc_read_page;
+		chip->ecc.write_page = vf610_nfc_write_page;
+		chip->ecc.mode = NAND_ECC_HW;
+
+		chip->ecc.bytes = 45;
+		chip->ecc.size = PAGE_2K;
+		chip->ecc.strength = 24;
+
+		/* set ECC mode to 45 bytes OOB with 24 bits correction */
+		vf610_nfc_set_field(mtd, NFC_FLASH_CONFIG,
+				CONFIG_ECC_MODE_MASK,
+				CONFIG_ECC_MODE_SHIFT, ECC_45_BYTE);
+
+		/* Enable ECC_STATUS */
+		vf610_nfc_set(mtd, NFC_FLASH_CONFIG, CONFIG_ECC_SRAM_REQ_BIT);
+
+	}
+
+	/* second phase scan */
+	err = nand_scan_tail(mtd);
+	if (err)
+		return err;
+
+	err = nand_register(devnum);
+	if (err)
+		return err;
+
+	return 0;
+
+error:
+	return err;
+}
+
+void board_nand_init(void)
+{
+	int err = vf610_nfc_nand_init(0, (u8 *)CONFIG_SYS_NAND_BASE);
+	if (err)
+		printf("VF610 NAND init failed (err %d)\n", err);
+}