diff mbox series

[v5,1/5] spi: spi-mem: Add driver for NXP FlexSPI controller

Message ID 1542366701-16065-2-git-send-email-yogeshnarayan.gaur@nxp.com
State Superseded
Delegated to: Boris Brezillon
Headers show
Series spi: spi-mem: Add driver for NXP FlexSPI controller | expand

Commit Message

Yogesh Narayan Gaur Nov. 16, 2018, 11:13 a.m. UTC
- Add driver for NXP FlexSPI host controller

(0) What is the FlexSPI controller?
 FlexSPI is a flexsible SPI host controller which supports two SPI
 channels and up to 4 external devices. Each channel supports
 Single/Dual/Quad/Octal mode data transfer (1/2/4/8 bidirectional
 data lines) i.e. FlexSPI acts as an interface to external devices,
 maximum 4, each with up to 8 bidirectional data lines.

 It uses new SPI memory interface of the SPI framework to issue
 flash memory operations to up to four connected flash
 devices (2 buses with 2 CS each).

(1) Tested this driver with the mtd_debug and JFFS2 filesystem utility
 on NXP LX2160ARDB and LX2160AQDS targets.
 LX2160ARDB is having two NOR slave device connected on single bus A
 i.e. A0 and A1 (CS0 and CS1).
 LX2160AQDS is having two NOR slave device connected on separate buses
 one flash on A0 and second on B1 i.e. (CS0 and CS3).
 Verified this driver on following SPI NOR flashes:
    Micron, mt35xu512ab, [Read - 1 bit mode]
    Cypress, s25fl512s, [Read - 1/2/4 bit mode]

Signed-off-by: Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
---
Changes for v5:
- Rebase on top of v4.20-rc2
- Modified fspi_readl_poll_tout() as per review comments
- Arrange header file in alphabetical order
- Removed usage of read()/write() function callback pointer
- Add support for 1 and 2 byte address length
- Change Frieder e-mail to new e-mail address
Changes for v4:
- Incorporate Boris review comments
  * Use readl_poll_timeout() instead of busy looping.
  * Re-define register masking as per comment.
  * Drop fspi_devtype enum.
Changes for v3:
- Added endianness flag in platform specific structure instead of DTS.
- Modified nxp_fspi_read_ahb(), removed remapping code.
- Added Boris and Frieder as Author and provided reference of spi-fsl-qspi.c
Changes for v2:
- Incorporated Boris review comments.
- Remove dependency of driver over connected flash device size.
- Modified the logic to select requested CS.
- Remove SPI-Octal Macros.

 drivers/spi/Kconfig        |   10 +
 drivers/spi/Makefile       |    1 +
 drivers/spi/spi-nxp-fspi.c | 1145 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1156 insertions(+)
 create mode 100644 drivers/spi/spi-nxp-fspi.c

Comments

Frieder Schrempf Dec. 6, 2018, 9:22 a.m. UTC | #1
Hi Yogesh,

I've had a closer look at your v5. See my comments below.

On 16.11.18 12:13, Yogesh Narayan Gaur wrote:
> - Add driver for NXP FlexSPI host controller
> 
> (0) What is the FlexSPI controller?
>   FlexSPI is a flexsible SPI host controller which supports two SPI
>   channels and up to 4 external devices. Each channel supports
>   Single/Dual/Quad/Octal mode data transfer (1/2/4/8 bidirectional
>   data lines) i.e. FlexSPI acts as an interface to external devices,
>   maximum 4, each with up to 8 bidirectional data lines.
> 
>   It uses new SPI memory interface of the SPI framework to issue
>   flash memory operations to up to four connected flash
>   devices (2 buses with 2 CS each).
> 
> (1) Tested this driver with the mtd_debug and JFFS2 filesystem utility
>   on NXP LX2160ARDB and LX2160AQDS targets.
>   LX2160ARDB is having two NOR slave device connected on single bus A
>   i.e. A0 and A1 (CS0 and CS1).
>   LX2160AQDS is having two NOR slave device connected on separate buses
>   one flash on A0 and second on B1 i.e. (CS0 and CS3).
>   Verified this driver on following SPI NOR flashes:
>      Micron, mt35xu512ab, [Read - 1 bit mode]
>      Cypress, s25fl512s, [Read - 1/2/4 bit mode]
> 
> Signed-off-by: Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
> ---
> Changes for v5:
> - Rebase on top of v4.20-rc2
> - Modified fspi_readl_poll_tout() as per review comments
> - Arrange header file in alphabetical order
> - Removed usage of read()/write() function callback pointer
> - Add support for 1 and 2 byte address length
> - Change Frieder e-mail to new e-mail address
> Changes for v4:
> - Incorporate Boris review comments
>    * Use readl_poll_timeout() instead of busy looping.
>    * Re-define register masking as per comment.
>    * Drop fspi_devtype enum.
> Changes for v3:
> - Added endianness flag in platform specific structure instead of DTS.
> - Modified nxp_fspi_read_ahb(), removed remapping code.
> - Added Boris and Frieder as Author and provided reference of spi-fsl-qspi.c
> Changes for v2:
> - Incorporated Boris review comments.
> - Remove dependency of driver over connected flash device size.
> - Modified the logic to select requested CS.
> - Remove SPI-Octal Macros.
> 
>   drivers/spi/Kconfig        |   10 +
>   drivers/spi/Makefile       |    1 +
>   drivers/spi/spi-nxp-fspi.c | 1145 ++++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 1156 insertions(+)
>   create mode 100644 drivers/spi/spi-nxp-fspi.c
> 
> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
> index 7d3a5c9..36630a1 100644
> --- a/drivers/spi/Kconfig
> +++ b/drivers/spi/Kconfig
> @@ -259,6 +259,16 @@ config SPI_FSL_LPSPI
>   	help
>   	  This enables Freescale i.MX LPSPI controllers in master mode.
>   
> +config SPI_NXP_FLEXSPI
> +	tristate "NXP Flex SPI controller"
> +	depends on ARCH_LAYERSCAPE || HAS_IOMEM
> +	help
> +	  This enables support for the Flex SPI controller in master mode.
> +	  Up to four slave devices can be connected on two buses with two
> +	  chipselects each.
> +	  This controller does not support generic SPI messages and only
> +	  supports the high-level SPI memory interface.
> +
>   config SPI_GPIO
>   	tristate "GPIO-based bitbanging SPI Master"
>   	depends on GPIOLIB || COMPILE_TEST
> diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
> index 3575205..55fec5c 100644
> --- a/drivers/spi/Makefile
> +++ b/drivers/spi/Makefile
> @@ -60,6 +60,7 @@ obj-$(CONFIG_SPI_MPC52xx)		+= spi-mpc52xx.o
>   obj-$(CONFIG_SPI_MT65XX)                += spi-mt65xx.o
>   obj-$(CONFIG_SPI_MXS)			+= spi-mxs.o
>   obj-$(CONFIG_SPI_NUC900)		+= spi-nuc900.o
> +obj-$(CONFIG_SPI_NXP_FLEXSPI)		+= spi-nxp-fspi.o
>   obj-$(CONFIG_SPI_OC_TINY)		+= spi-oc-tiny.o
>   spi-octeon-objs				:= spi-cavium.o spi-cavium-octeon.o
>   obj-$(CONFIG_SPI_OCTEON)		+= spi-octeon.o
> diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
> new file mode 100644
> index 0000000..a35013b
> --- /dev/null
> +++ b/drivers/spi/spi-nxp-fspi.c
> @@ -0,0 +1,1145 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +
> +/*
> + * NXP FlexSPI(FSPI) controller driver.
> + *
> + * Copyright 2018 NXP.
> + *
> + * FlexSPI is a flexsible SPI host controller which supports two SPI
> + * channels and up to 4 external devices. Each channel supports
> + * Single/Dual/Quad/Octal mode data transfer (1/2/4/8 bidirectional
> + * data lines).
> + *
> + * FlexSPI controller is driven by the LUT(Look-up Table) registers
> + * LUT registers are a look-up-table for sequences of instructions.
> + * A valid sequence consists of four LUT registers.
> + * Maximum 32 LUT sequences can be programmed simultaneously.
> + *
> + * LUTs are being created at run-time based on the commands passed
> + * from the spi-mem framework, thus using single LUT index.
> + *
> + * Software triggered Flash read/write access by IP Bus.
> + *
> + * Memory mapped read access by AHB Bus.
> + *
> + * Based on SPI MEM interface and spi-fsl-qspi.c driver.
> + *
> + * Author:
> + *     Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
> + *     Boris Brezillion <boris.brezillon@bootlin.com>
> + *     Frieder Schrempf <frieder.schrempf@kontron.de>
> + */
> +
> +#include <linux/bitops.h>
> +#include <linux/clk.h>
> +#include <linux/completion.h>
> +#include <linux/delay.h>
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/iopoll.h>
> +#include <linux/jiffies.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/of.h>
> +#include <linux/of_device.h>
> +#include <linux/platform_device.h>
> +#include <linux/pm_qos.h>
> +#include <linux/sizes.h>
> +
> +#include <linux/spi/spi.h>
> +#include <linux/spi/spi-mem.h>
> +
> +/*
> + * The driver only uses one single LUT entry, that is updated on
> + * each call of exec_op(). Index 0 is preset at boot with a basic
> + * read operation, so let's use the last entry (31).
> + */
> +#define	SEQID_LUT			31
> +
> +/* Registers used by the driver */
> +#define FSPI_MCR0			0x00
> +#define FSPI_MCR0_AHB_TIMEOUT(x)	((x) << 24)
> +#define FSPI_MCR0_IP_TIMEOUT(x)		((x) << 16)
> +#define FSPI_MCR0_LEARN_EN		BIT(15)
> +#define FSPI_MCR0_SCRFRUN_EN		BIT(14)
> +#define FSPI_MCR0_OCTCOMB_EN		BIT(13)
> +#define FSPI_MCR0_DOZE_EN		BIT(12)
> +#define FSPI_MCR0_HSEN			BIT(11)
> +#define FSPI_MCR0_SERCLKDIV		BIT(8)
> +#define FSPI_MCR0_ATDF_EN		BIT(7)
> +#define FSPI_MCR0_ARDF_EN		BIT(6)
> +#define FSPI_MCR0_RXCLKSRC(x)		((x) << 4)
> +#define FSPI_MCR0_END_CFG(x)		((x) << 2)
> +#define FSPI_MCR0_MDIS			BIT(1)
> +#define FSPI_MCR0_SWRST			BIT(0)
> +
> +#define FSPI_MCR1			0x04
> +#define FSPI_MCR1_SEQ_TIMEOUT(x)	((x) << 16)
> +#define FSPI_MCR1_AHB_TIMEOUT(x)	(x)
> +
> +#define FSPI_MCR2			0x08
> +#define FSPI_MCR2_IDLE_WAIT(x)		((x) << 24)
> +#define FSPI_MCR2_SAMEDEVICEEN		BIT(15)
> +#define FSPI_MCR2_CLRLRPHS		BIT(14)
> +#define FSPI_MCR2_ABRDATSZ		BIT(8)
> +#define FSPI_MCR2_ABRLEARN		BIT(7)
> +#define FSPI_MCR2_ABR_READ		BIT(6)
> +#define FSPI_MCR2_ABRWRITE		BIT(5)
> +#define FSPI_MCR2_ABRDUMMY		BIT(4)
> +#define FSPI_MCR2_ABR_MODE		BIT(3)
> +#define FSPI_MCR2_ABRCADDR		BIT(2)
> +#define FSPI_MCR2_ABRRADDR		BIT(1)
> +#define FSPI_MCR2_ABR_CMD		BIT(0)
> +
> +#define FSPI_AHBCR			0x0c
> +#define FSPI_AHBCR_RDADDROPT		BIT(6)
> +#define FSPI_AHBCR_PREF_EN		BIT(5)
> +#define FSPI_AHBCR_BUFF_EN		BIT(4)
> +#define FSPI_AHBCR_CACH_EN		BIT(3)
> +#define FSPI_AHBCR_CLRTXBUF		BIT(2)
> +#define FSPI_AHBCR_CLRRXBUF		BIT(1)
> +#define FSPI_AHBCR_PAR_EN		BIT(0)
> +
> +#define FSPI_INTEN			0x10
> +#define FSPI_INTEN_SCLKSBWR		BIT(9)
> +#define FSPI_INTEN_SCLKSBRD		BIT(8)
> +#define FSPI_INTEN_DATALRNFL		BIT(7)
> +#define FSPI_INTEN_IPTXWE		BIT(6)
> +#define FSPI_INTEN_IPRXWA		BIT(5)
> +#define FSPI_INTEN_AHBCMDERR		BIT(4)
> +#define FSPI_INTEN_IPCMDERR		BIT(3)
> +#define FSPI_INTEN_AHBCMDGE		BIT(2)
> +#define FSPI_INTEN_IPCMDGE		BIT(1)
> +#define FSPI_INTEN_IPCMDDONE		BIT(0)
> +
> +#define FSPI_INTR			0x14
> +#define FSPI_INTR_SCLKSBWR		BIT(9)
> +#define FSPI_INTR_SCLKSBRD		BIT(8)
> +#define FSPI_INTR_DATALRNFL		BIT(7)
> +#define FSPI_INTR_IPTXWE		BIT(6)
> +#define FSPI_INTR_IPRXWA		BIT(5)
> +#define FSPI_INTR_AHBCMDERR		BIT(4)
> +#define FSPI_INTR_IPCMDERR		BIT(3)
> +#define FSPI_INTR_AHBCMDGE		BIT(2)
> +#define FSPI_INTR_IPCMDGE		BIT(1)
> +#define FSPI_INTR_IPCMDDONE		BIT(0)
> +
> +#define FSPI_LUTKEY			0x18
> +#define FSPI_LUTKEY_VALUE		0x5AF05AF0
> +
> +#define FSPI_LCKCR			0x1C
> +
> +#define FSPI_LCKER_LOCK			0x1
> +#define FSPI_LCKER_UNLOCK		0x2
> +
> +#define FSPI_BUFXCR_INVALID_MSTRID	0xE
> +#define FSPI_AHBRX_BUF0CR0		0x20
> +#define FSPI_AHBRX_BUF1CR0		0x24
> +#define FSPI_AHBRX_BUF2CR0		0x28
> +#define FSPI_AHBRX_BUF3CR0		0x2C
> +#define FSPI_AHBRX_BUF4CR0		0x30
> +#define FSPI_AHBRX_BUF5CR0		0x34
> +#define FSPI_AHBRX_BUF6CR0		0x38
> +#define FSPI_AHBRX_BUF7CR0		0x3C
> +#define FSPI_AHBRXBUF0CR7_PREF		BIT(31)
> +
> +#define FSPI_AHBRX_BUF0CR1		0x40
> +#define FSPI_AHBRX_BUF1CR1		0x44
> +#define FSPI_AHBRX_BUF2CR1		0x48
> +#define FSPI_AHBRX_BUF3CR1		0x4C
> +#define FSPI_AHBRX_BUF4CR1		0x50
> +#define FSPI_AHBRX_BUF5CR1		0x54
> +#define FSPI_AHBRX_BUF6CR1		0x58
> +#define FSPI_AHBRX_BUF7CR1		0x5C
> +
> +#define FSPI_FLSHA1CR0			0x60
> +#define FSPI_FLSHA2CR0			0x64
> +#define FSPI_FLSHB1CR0			0x68
> +#define FSPI_FLSHB2CR0			0x6C
> +#define FSPI_FLSHXCR0_SZ_KB		10
> +#define FSPI_FLSHXCR0_SZ(x)		((x) >> FSPI_FLSHXCR0_SZ_KB)
> +
> +#define FSPI_FLSHA1CR1			0x70
> +#define FSPI_FLSHA2CR1			0x74
> +#define FSPI_FLSHB1CR1			0x78
> +#define FSPI_FLSHB2CR1			0x7C
> +#define FSPI_FLSHXCR1_CSINTR(x)		((x) << 16)
> +#define FSPI_FLSHXCR1_CAS(x)		((x) << 11)
> +#define FSPI_FLSHXCR1_WA		BIT(10)
> +#define FSPI_FLSHXCR1_TCSH(x)		((x) << 5)
> +#define FSPI_FLSHXCR1_TCSS(x)		(x)
> +
> +#define FSPI_FLSHA1CR2			0x80
> +#define FSPI_FLSHA2CR2			0x84
> +#define FSPI_FLSHB1CR2			0x88
> +#define FSPI_FLSHB2CR2			0x8C
> +#define FSPI_FLSHXCR2_CLRINSP		BIT(24)
> +#define FSPI_FLSHXCR2_AWRWAIT		BIT(16)
> +#define FSPI_FLSHXCR2_AWRSEQN_SHIFT	13
> +#define FSPI_FLSHXCR2_AWRSEQI_SHIFT	8
> +#define FSPI_FLSHXCR2_ARDSEQN_SHIFT	5
> +#define FSPI_FLSHXCR2_ARDSEQI_SHIFT	0
> +
> +#define FSPI_IPCR0			0xA0
> +
> +#define FSPI_IPCR1			0xA4
> +#define FSPI_IPCR1_IPAREN		BIT(31)
> +#define FSPI_IPCR1_SEQNUM_SHIFT		24
> +#define FSPI_IPCR1_SEQID_SHIFT		16
> +#define FSPI_IPCR1_IDATSZ(x)		(x)
> +
> +#define FSPI_IPCMD			0xB0
> +#define FSPI_IPCMD_TRG			BIT(0)
> +
> +#define FSPI_DLPR			0xB4
> +
> +#define FSPI_IPRXFCR			0xB8
> +#define FSPI_IPRXFCR_CLR		BIT(0)
> +#define FSPI_IPRXFCR_DMA_EN		BIT(1)
> +#define FSPI_IPRXFCR_WMRK(x)		((x) << 2)
> +
> +#define FSPI_IPTXFCR			0xBC
> +#define FSPI_IPTXFCR_CLR		BIT(0)
> +#define FSPI_IPTXFCR_DMA_EN		BIT(1)
> +#define FSPI_IPTXFCR_WMRK(x)		((x) << 2)
> +
> +#define FSPI_DLLACR			0xC0
> +#define FSPI_DLLACR_OVRDEN		BIT(8)
> +
> +#define FSPI_DLLBCR			0xC4
> +#define FSPI_DLLBCR_OVRDEN		BIT(8)
> +
> +#define FSPI_STS0			0xE0
> +#define FSPI_STS0_DLPHB(x)		((x) << 8)
> +#define FSPI_STS0_DLPHA(x)		((x) << 4)
> +#define FSPI_STS0_CMD_SRC(x)		((x) << 2)
> +#define FSPI_STS0_ARB_IDLE		BIT(1)
> +#define FSPI_STS0_SEQ_IDLE		BIT(0)
> +
> +#define FSPI_STS1			0xE4
> +#define FSPI_STS1_IP_ERRCD(x)		((x) << 24)
> +#define FSPI_STS1_IP_ERRID(x)		((x) << 16)
> +#define FSPI_STS1_AHB_ERRCD(x)		((x) << 8)
> +#define FSPI_STS1_AHB_ERRID(x)		(x)
> +
> +#define FSPI_AHBSPNST			0xEC
> +#define FSPI_AHBSPNST_DATLFT(x)		((x) << 16)
> +#define FSPI_AHBSPNST_BUFID(x)		((x) << 1)
> +#define FSPI_AHBSPNST_ACTIVE		BIT(0)
> +
> +#define FSPI_IPRXFSTS			0xF0
> +#define FSPI_IPRXFSTS_RDCNTR(x)		((x) << 16)
> +#define FSPI_IPRXFSTS_FILL(x)		(x)
> +
> +#define FSPI_IPTXFSTS			0xF4
> +#define FSPI_IPTXFSTS_WRCNTR(x)		((x) << 16)
> +#define FSPI_IPTXFSTS_FILL(x)		(x)
> +
> +#define FSPI_RFDR			0x100
> +#define FSPI_TFDR			0x180
> +
> +#define FSPI_LUT_BASE			0x200
> +#define FSPI_LUT_OFFSET			(SEQID_LUT * 4 * 4)
> +#define FSPI_LUT_REG(idx) \
> +	(FSPI_LUT_BASE + FSPI_LUT_OFFSET + (idx) * 4)
> +
> +/* register map end */
> +
> +/* Instruction set for the LUT register. */
> +#define LUT_STOP			0x00
> +#define LUT_CMD				0x01
> +#define LUT_ADDR			0x02
> +#define LUT_CADDR_SDR			0x03
> +#define LUT_MODE			0x04
> +#define LUT_MODE2			0x05
> +#define LUT_MODE4			0x06
> +#define LUT_MODE8			0x07
> +#define LUT_NXP_WRITE			0x08
> +#define LUT_NXP_READ			0x09
> +#define LUT_LEARN_SDR			0x0A
> +#define LUT_DATSZ_SDR			0x0B
> +#define LUT_DUMMY			0x0C
> +#define LUT_DUMMY_RWDS_SDR		0x0D
> +#define LUT_JMP_ON_CS			0x1F
> +#define LUT_CMD_DDR			0x21
> +#define LUT_ADDR_DDR			0x22
> +#define LUT_CADDR_DDR			0x23
> +#define LUT_MODE_DDR			0x24
> +#define LUT_MODE2_DDR			0x25
> +#define LUT_MODE4_DDR			0x26
> +#define LUT_MODE8_DDR			0x27
> +#define LUT_WRITE_DDR			0x28
> +#define LUT_READ_DDR			0x29
> +#define LUT_LEARN_DDR			0x2A
> +#define LUT_DATSZ_DDR			0x2B
> +#define LUT_DUMMY_DDR			0x2C
> +#define LUT_DUMMY_RWDS_DDR		0x2D
> +
> +/*
> + * Calculate number of required PAD bits for LUT register.
> + *
> + * The pad stands for the number of IO lines [0:7].
> + * For example, the octal read needs eight IO lines,
> + * so you should use LUT_PAD(8). This macro
> + * returns 3 i.e. use eight (2^3) IP lines for read.
> + */
> +#define LUT_PAD(x) (fls(x) - 1)
> +
> +/*
> + * Macro for constructing the LUT entries with the following
> + * register layout:
> + *
> + *  ---------------------------------------------------
> + *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
> + *  ---------------------------------------------------
> + */
> +#define PAD_SHIFT		8
> +#define INSTR_SHIFT		10
> +#define OPRND_SHIFT		16
> +
> +/* Macros for constructing the LUT register. */
> +#define LUT_DEF(idx, ins, pad, opr)			  \
> +	((((ins) << INSTR_SHIFT) | ((pad) << PAD_SHIFT) | \
> +	(opr)) << (((idx) % 2) * OPRND_SHIFT))
> +
> +/* Operands for the LUT register. */
> +#define ADDR8BIT		0x08
> +#define ADDR16BIT		0x10
> +#define ADDR24BIT		0x18
> +#define ADDR32BIT		0x20

You can drop these ADDRXBIT definitions, see below...

> +
> +#define POLL_TOUT		5000
> +#define NXP_FSPI_MAX_CHIPSELECT		4
> +
> +struct nxp_fspi_devtype_data {
> +	unsigned int rxfifo;
> +	unsigned int txfifo;
> +	unsigned int ahb_buf_size;
> +	unsigned int quirks;
> +	bool little_endian;
> +};
> +
> +static const struct nxp_fspi_devtype_data lx2160a_data = {
> +	.rxfifo = SZ_512,       /* (64  * 64 bits)  */
> +	.txfifo = SZ_1K,        /* (128 * 64 bits)  */
> +	.ahb_buf_size = SZ_2K,  /* (256 * 64 bits)  */
> +	.quirks = 0,
> +	.little_endian = true,  /* little-endian    */
> +};
> +
> +struct nxp_fspi {
> +	void __iomem *iobase;
> +	void __iomem *ahb_addr;
> +	u32 memmap_phy;
> +	u32 memmap_phy_size;
> +	struct clk *clk, *clk_en;
> +	struct device *dev;
> +	struct completion c;
> +	const struct nxp_fspi_devtype_data *devtype_data;
> +	struct mutex lock;
> +	struct pm_qos_request pm_qos_req;
> +	int selected;
> +};
> +
> +/*
> + * R/W functions for big- or little-endian registers:
> + * The FSPI controller's endianness is independent of
> + * the CPU core's endianness. So far, although the CPU
> + * core is little-endian the FSPI controller can use
> + * big-endian or little-endian.
> + */
> +static void fspi_writel(struct nxp_fspi *f, u32 val, void __iomem *addr)
> +{
> +	if (f->devtype_data->little_endian)
> +		iowrite32(val, addr);
> +	else
> +		iowrite32be(val, addr);
> +}
> +
> +static u32 fspi_readl(struct nxp_fspi *f, void __iomem *addr)
> +{
> +	if (f->devtype_data->little_endian)
> +		return ioread32(addr);
> +	else
> +		return ioread32be(addr);
> +}
> +
> +static irqreturn_t nxp_fspi_irq_handler(int irq, void *dev_id)
> +{
> +	struct nxp_fspi *f = dev_id;
> +	u32 reg;
> +
> +	/* clear interrupt */
> +	reg = fspi_readl(f, f->iobase + FSPI_INTR);
> +	fspi_writel(f, FSPI_INTR_IPCMDDONE, f->iobase + FSPI_INTR);
> +
> +	if (reg & FSPI_INTR_IPCMDDONE)
> +		complete(&f->c);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static int nxp_fspi_check_buswidth(struct nxp_fspi *f, u8 width)
> +{
> +	switch (width) {
> +	case 1:
> +	case 2:
> +	case 4:
> +	case 8:
> +		return 0;
> +	}
> +
> +	return -ENOTSUPP;
> +}
> +
> +static bool nxp_fspi_supports_op(struct spi_mem *mem,
> +				 const struct spi_mem_op *op)
> +{
> +	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
> +	int ret;
> +
> +	ret = nxp_fspi_check_buswidth(f, op->cmd.buswidth);
> +
> +	if (op->addr.nbytes)
> +		ret |= nxp_fspi_check_buswidth(f, op->addr.buswidth);
> +
> +	if (op->dummy.nbytes)
> +		ret |= nxp_fspi_check_buswidth(f, op->dummy.buswidth);
> +
> +	if (op->data.nbytes)
> +		ret |= nxp_fspi_check_buswidth(f, op->data.buswidth);
> +
> +	if (ret)
> +		return false;
> +
> +	/*
> +	 * The number of instructions needed for the op, needs
> +	 * to fit into a single LUT entry.
> +	 */
> +	if (op->addr.nbytes +
> +	   (op->dummy.nbytes ? 1:0) +
> +	   (op->data.nbytes ? 1:0) > 6)
> +		return false;
> +
> +	/* Max 64 dummy clock cycles supported */
> +	if (op->dummy.buswidth &&
> +	    (op->dummy.nbytes * 8 / op->dummy.buswidth > 64))
> +		return false;
> +
> +	/* Max data length, check controller limits and alignment */
> +	if (op->data.dir == SPI_MEM_DATA_IN &&
> +	    (op->data.nbytes > f->devtype_data->ahb_buf_size ||
> +	     (op->data.nbytes > f->devtype_data->rxfifo - 4 &&
> +	      !IS_ALIGNED(op->data.nbytes, 8))))
> +		return false;
> +
> +	if (op->data.dir == SPI_MEM_DATA_OUT &&
> +	    op->data.nbytes > f->devtype_data->txfifo)
> +		return false;
> +
> +	return true;
> +}
> +
> +/* Instead of busy looping invoke readl_poll_timeout functionality. */
> +static int fspi_readl_poll_tout(struct nxp_fspi *f, void __iomem *base,
> +				u32 mask, u32 delay_us,
> +				u32 timeout_us, bool condition)
> +{
> +	u32 reg;
> +
> +	if (!f->devtype_data->little_endian)
> +		mask = (u32)cpu_to_be32(mask);
> +
> +	if (condition)
> +		return readl_poll_timeout(base, reg, (reg & mask),
> +					  delay_us, timeout_us);
> +	else
> +		return readl_poll_timeout(base, reg, !(reg & mask),
> +					  delay_us, timeout_us);

I would rather use a local variable to store the condition:

bool c = condition ? (reg & mask):!(reg & mask);

return readl_poll_timeout(base, reg, c, delay_us, timeout_us);

> +}
> +
> +/*
> + * If the slave device content being changed by Write/Erase, need to
> + * invalidate the AHB buffer. This can be achieved by doing the reset
> + * of controller after setting MCR0[SWRESET] bit.
> + */
> +static inline void nxp_fspi_invalid(struct nxp_fspi *f)
> +{
> +	u32 reg;
> +	int ret;
> +
> +	reg = fspi_readl(f, f->iobase + FSPI_MCR0);
> +	fspi_writel(f, reg | FSPI_MCR0_SWRST, f->iobase + FSPI_MCR0);
> +
> +	/* w1c register, wait unit clear */
> +	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
> +				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
> +	WARN_ON(ret);
> +}
> +
> +static void nxp_fspi_prepare_lut(struct nxp_fspi *f,
> +				 const struct spi_mem_op *op)
> +{
> +	void __iomem *base = f->iobase;
> +	u32 lutval[4] = {};
> +	int lutidx = 1, i;
> +
> +	/* cmd */
> +	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
> +			     op->cmd.opcode);
> +
> +	/* addr bus width */
> +	if (op->addr.nbytes) {
> +		u32 addrlen = 0;
> +
> +		switch (op->addr.nbytes) {
> +		case 1:
> +			addrlen = ADDR8BIT;
> +			break;
> +		case 2:
> +			addrlen = ADDR16BIT;
> +			break;
> +		case 3:
> +			addrlen = ADDR24BIT;
> +			break;
> +		case 4:
> +			addrlen = ADDR32BIT;
> +			break;
> +		default:
> +			dev_err(f->dev, "In-correct address length\n");
> +			return;
> +		}

You don't need to validate op->addr.nbytes here, this is already done in 
nxp_fspi_supports_op().

> +
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_ADDR,
> +					      LUT_PAD(op->addr.buswidth),
> +					      addrlen);

You can also just remove the whole switch statement above and use this:

lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_ADDR,
			      LUT_PAD(op->addr.buswidth),
			      op->addr.nbytes * 8);

> +		lutidx++;
> +	}
> +
> +	/* dummy bytes, if needed */
> +	if (op->dummy.nbytes) {
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
> +		/*
> +		 * Due to FlexSPI controller limitation number of PAD for dummy
> +		 * buswidth needs to be programmed as equal to data buswidth.
> +		 */
> +					      LUT_PAD(op->data.buswidth),
> +					      op->dummy.nbytes * 8 /
> +					      op->dummy.buswidth);
> +		lutidx++;
> +	}
> +
> +	/* read/write data bytes */
> +	if (op->data.nbytes) {
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx,
> +					      op->data.dir == SPI_MEM_DATA_IN ?
> +					      LUT_NXP_READ : LUT_NXP_WRITE,
> +					      LUT_PAD(op->data.buswidth),
> +					      0);
> +		lutidx++;
> +	}
> +
> +	/* stop condition. */
> +	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
> +
> +	/* unlock LUT */
> +	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
> +	fspi_writel(f, FSPI_LCKER_UNLOCK, f->iobase + FSPI_LCKCR);
> +
> +	/* fill LUT */
> +	for (i = 0; i < ARRAY_SIZE(lutval); i++)
> +		fspi_writel(f, lutval[i], base + FSPI_LUT_REG(i));
> +
> +	dev_dbg(f->dev, "CMD[%x] lutval[0:%x \t 1:%x \t 2:%x \t 3:%x]\n",
> +		op->cmd.opcode, lutval[0], lutval[1], lutval[2], lutval[3]);
> +
> +	/* lock LUT */
> +	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
> +	fspi_writel(f, FSPI_LCKER_LOCK, f->iobase + FSPI_LCKCR);
> +}
> +
> +static int nxp_fspi_clk_prep_enable(struct nxp_fspi *f)
> +{
> +	int ret;
> +
> +	ret = clk_prepare_enable(f->clk_en);
> +	if (ret)
> +		return ret;
> +
> +	ret = clk_prepare_enable(f->clk);
> +	if (ret) {
> +		clk_disable_unprepare(f->clk_en);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static void nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
> +{
> +	clk_disable_unprepare(f->clk);
> +	clk_disable_unprepare(f->clk_en);
> +}
> +
> +/*
> + * In FlexSPI controller, flash access is based on value of FSPI_FLSHXXCR0
> + * register and start base address of the slave device.
> + *
> + *							    (Higher address)
> + *				--------    <-- FLSHB2CR0
> + *				|  B2  |
> + *				|      |
> + *	B2 start address -->	--------    <-- FLSHB1CR0
> + *				|  B1  |
> + *				|      |
> + *	B1 start address -->	--------    <-- FLSHA2CR0
> + *				|  A2  |
> + *				|      |
> + *	A2 start address -->	--------    <-- FLSHA1CR0
> + *				|  A1  |
> + *				|      |
> + *	A1 start address -->	--------		    (Lower address)
> + *
> + *
> + * Start base address defines the starting address range for given CS and
> + * FSPI_FLSHXXCR0 defines the size of the slave device connected at given CS.
> + *
> + * But, different targets are having different combinations of number of CS,
> + * some targets only have single CS or two CS covering controller's full
> + * memory mapped space area.
> + * Thus, implementation is being done as independent of the size and number
> + * of the connected slave device.
> + * Assign controller memory mapped space size as the size to the connected
> + * slave device.
> + * Mark FLSHxxCR0 as zero initially and then assign value only to the selected
> + * chip-select Flash configuration register.
> + *
> + * For e.g. to access CS2 (B1), FLSHB1CR0 register would be equal to the
> + * memory mapped size of the controller.
> + * Value for rest of the CS FLSHxxCR0 register would be zero.
> + *
> + */
> +static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi)
> +{
> +	unsigned long rate = spi->max_speed_hz;
> +	int ret;
> +	uint64_t size_kb;
> +
> +	/*
> +	 * Return, if previously selected slave device is same as current
> +	 * requested slave device.
> +	 */
> +	if (f->selected == spi->chip_select)
> +		return;
> +
> +	/* Reset FLSHxxCR0 registers */
> +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA1CR0);
> +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA2CR0);
> +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB1CR0);
> +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB2CR0);
> +
> +	/* Assign controller memory mapped space as size, KBytes, of flash. */
> +	size_kb = FSPI_FLSHXCR0_SZ(f->memmap_phy_size);

You are still using memory of arbitrary size (memmap_phy_size) for 
mapping the flash. Why not use the same approach as in the QSPI driver 
and just map ahb_buf_size until we implement the dirmap API?
You are already aligning the AHB reads for this in 
nxp_fspi_adjust_op_size().

> +
> +	switch (spi->chip_select) {
> +	case 0:
> +		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHA1CR0);
> +		break;
> +	case 1:
> +		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHA2CR0);
> +		break;
> +	case 2:
> +		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHB1CR0);
> +		break;
> +	case 3:
> +		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHB2CR0);
> +		break;
> +	default:
> +		dev_err(f->dev, "In-correct CS provided\n");
> +		return;

You don't need to validate spi->chip_select here. This should never be 
invalid and if it is, something is really wrong and your check won't help.

> +	}
> +
> +	dev_dbg(f->dev, "Slave device [CS:%x] selected\n", spi->chip_select);
> +
> +	nxp_fspi_clk_disable_unprep(f);
> +
> +	ret = clk_set_rate(f->clk, rate);
> +	if (ret)
> +		return;
> +
> +	ret = nxp_fspi_clk_prep_enable(f);
> +	if (ret)
> +		return;

Missing newline line here.

> +	f->selected = spi->chip_select;
> +}
> +
> +static void nxp_fspi_read_ahb(struct nxp_fspi *f, const struct spi_mem_op *op)
> +{
> +	u32 len = op->data.nbytes;
> +
> +	/* Read out the data directly from the AHB buffer. */
> +	memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val), len);
> +}
> +
> +static void nxp_fspi_fill_txfifo(struct nxp_fspi *f,
> +				 const struct spi_mem_op *op)
> +{
> +	void __iomem *base = f->iobase;
> +	int i, j, ret;
> +	int size, tmp_size, wm_size;
> +	u32 data = 0;
> +	u32 *txbuf = (u32 *) op->data.buf.out;
> +
> +	/* clear the TX FIFO. */
> +	fspi_writel(f, FSPI_IPTXFCR_CLR, base + FSPI_IPTXFCR);
> +
> +	/* Default value of water mark level is 8 bytes. */
> +	wm_size = 8;
> +	size = op->data.nbytes / wm_size;
> +	for (i = 0; i < size; i++) {
> +		/* Wait for TXFIFO empty */
> +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> +					   FSPI_INTR_IPTXWE, 0,
> +					   POLL_TOUT, true);
> +		WARN_ON(ret);
> +
> +		j = 0;
> +		tmp_size = wm_size;
> +		while (tmp_size > 0) {
> +			data = 0;
> +			memcpy(&data, txbuf, 4);
> +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
> +			tmp_size -= 4;
> +			j++;
> +			txbuf += 1;
> +		}
> +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
> +	}
> +
> +	size = op->data.nbytes % wm_size;
> +	if (size) {
> +		/* Wait for TXFIFO empty */
> +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> +					   FSPI_INTR_IPTXWE, 0,
> +					   POLL_TOUT, true);
> +		WARN_ON(ret);
> +
> +		j = 0;
> +		tmp_size = 0;
> +		while (size > 0) {
> +			data = 0;
> +			tmp_size = (size < 4) ? size : 4;
> +			memcpy(&data, txbuf, tmp_size);
> +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
> +			size -= tmp_size;
> +			j++;
> +			txbuf += 1;
> +		}
> +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
> +	}

All these nested loops to fill the TX buffer and also the ones below to 
read the RX buffer look much more complicated than they should really 
be. Can you try to make this more readable?

Maybe something like this would work:

for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 8); i += 8) {
	/* Wait for TXFIFO empty */
	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
				   FSPI_INTR_IPTXWE, 0,
				   POLL_TOUT, true);

	fspi_writel(f, op->data.buf.out + i, base + FSPI_TFDR);
	fspi_writel(f, op->data.buf.out + i + 4, base + FSPI_TFDR + 4);
	fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
}

if (i < op->data.nbytes) {
	u32 data = 0;
	int j;
	/* Wait for TXFIFO empty */
	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
				   FSPI_INTR_IPTXWE, 0,
				   POLL_TOUT, true);

	for (j = 0; j < ALIGN(op->data.nbytes - i, 4); j += 4) {
		memcpy(&data, op->data.buf.out + i + j, 4);
		fspi_writel(f, data, base + FSPI_TFDR + j);
	}

	fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
}

> +}
> +
> +static void nxp_fspi_read_rxfifo(struct nxp_fspi *f,
> +			  const struct spi_mem_op *op)
> +{
> +	void __iomem *base = f->iobase;
> +	int i, j;
> +	int size, tmp_size, wm_size, ret;
> +	u32 tmp = 0;
> +	u8 *buf = op->data.buf.in;
> +	u32 len = op->data.nbytes;
> +
> +	/* Default value of water mark level is 8 bytes. */
> +	wm_size = 8;
> +
> +	while (len > 0) {
> +		size = len / wm_size;
> +
> +		for (i = 0; i < size; i++) {
> +			/* Wait for RXFIFO available */
> +			ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> +						   FSPI_INTR_IPRXWA, 0,
> +						   POLL_TOUT, true);
> +			WARN_ON(ret);
> +
> +			j = 0;
> +			tmp_size = wm_size;
> +			while (tmp_size > 0) {
> +				tmp = 0;
> +				tmp = fspi_readl(f, base + FSPI_RFDR + j * 4);
> +				memcpy(buf, &tmp, 4);
> +				tmp_size -= 4;
> +				j++;
> +				buf += 4;
> +			}
> +			/* move the FIFO pointer */
> +			fspi_writel(f, FSPI_INTR_IPRXWA, base + FSPI_INTR);
> +			len -= wm_size;
> +		}
> +
> +		size = len % wm_size;
> +
> +		j = 0;
> +		if (size) {
> +			/* Wait for RXFIFO available */
> +			ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> +						   FSPI_INTR_IPRXWA, 0,
> +						   POLL_TOUT, true);
> +			WARN_ON(ret);
> +
> +			while (len > 0) {
> +				tmp = 0;
> +				size = (len < 4) ? len : 4;
> +				tmp = fspi_readl(f, base + FSPI_RFDR + j * 4);
> +				memcpy(buf, &tmp, size);
> +				len -= size;
> +				j++;
> +				buf += size;
> +			}
> +		}
> +
> +		/* invalid the RXFIFO */
> +		fspi_writel(f, FSPI_IPRXFCR_CLR, base + FSPI_IPRXFCR);
> +		/* move the FIFO pointer */
> +		fspi_writel(f, FSPI_INTR_IPRXWA, base + FSPI_INTR);
> +	}

Same here. I think this is overly complicated.

> +}
> +
> +static int nxp_fspi_do_op(struct nxp_fspi *f, const struct spi_mem_op *op)
> +{
> +	void __iomem *base = f->iobase;
> +	int seqnum = 0;
> +	int err = 0;
> +	u32 reg;
> +
> +	reg = fspi_readl(f, base + FSPI_IPRXFCR);
> +	/* invalid RXFIFO first */
> +	reg &= ~FSPI_IPRXFCR_DMA_EN;
> +	reg = reg | FSPI_IPRXFCR_CLR;
> +	fspi_writel(f, reg, base + FSPI_IPRXFCR);
> +
> +	init_completion(&f->c);
> +
> +	fspi_writel(f, op->addr.val, base + FSPI_IPCR0);
> +	/*
> +	 * Always start the sequence at the same index since we update
> +	 * the LUT at each exec_op() call. And also specify the DATA
> +	 * length, since it's has not been specified in the LUT.
> +	 */
> +	fspi_writel(f, op->data.nbytes |
> +		 (SEQID_LUT << FSPI_IPCR1_SEQID_SHIFT) |
> +		 (seqnum << FSPI_IPCR1_SEQNUM_SHIFT),
> +		 base + FSPI_IPCR1);
> +
> +	/* Trigger the LUT now. */
> +	fspi_writel(f, FSPI_IPCMD_TRG, base + FSPI_IPCMD);
> +
> +	/* Wait for the interrupt. */
> +	if (!wait_for_completion_timeout(&f->c, msecs_to_jiffies(1000)))
> +		err = -ETIMEDOUT;
> +
> +	/* Invoke IP data read, if request is of data read. */
> +	if (!err && op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN)
> +		nxp_fspi_read_rxfifo(f, op);
> +
> +	return err;
> +}
> +
> +static int nxp_fspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
> +{
> +	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
> +	int err = 0;
> +
> +	mutex_lock(&f->lock);
> +
> +	/* Wait for controller being ready. */
> +	err = fspi_readl_poll_tout(f, f->iobase + FSPI_STS0,
> +				   FSPI_STS0_ARB_IDLE, 1, POLL_TOUT, true);
> +	WARN_ON(err);
> +
> +	nxp_fspi_select_mem(f, mem->spi);
> +
> +	nxp_fspi_prepare_lut(f, op);
> +	/*
> +	 * If we have large chunks of data, we read them through the AHB bus
> +	 * by accessing the mapped memory. In all other cases we use
> +	 * IP commands to access the flash.
> +	 */
> +	if (op->data.nbytes > (f->devtype_data->rxfifo - 4) &&
> +	    op->data.dir == SPI_MEM_DATA_IN) {
> +		nxp_fspi_read_ahb(f, op);
> +	} else {
> +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
> +			nxp_fspi_fill_txfifo(f, op);
> +
> +		err = nxp_fspi_do_op(f, op);
> +
> +		/* Invalidate the data in the AHB buffer. */
> +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
> +			nxp_fspi_invalid(f);

E.g. in case of an erase operation or a NAND load page operation, the 
invalidation is not triggered, but flash/buffer contents have changed. 
So I'm not sure if this is enough...

> +	}
> +
> +	mutex_unlock(&f->lock);
> +
> +	return err;
> +}
> +
> +static int nxp_fspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
> +{
> +	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
> +
> +	if (op->data.dir == SPI_MEM_DATA_OUT) {
> +		if (op->data.nbytes > f->devtype_data->txfifo)
> +			op->data.nbytes = f->devtype_data->txfifo;
> +	} else {
> +		if (op->data.nbytes > f->devtype_data->ahb_buf_size)
> +			op->data.nbytes = f->devtype_data->ahb_buf_size;
> +		else if (op->data.nbytes > (f->devtype_data->rxfifo - 4))
> +			op->data.nbytes = ALIGN_DOWN(op->data.nbytes, 8);

You are using the same alignments as in the QSPI driver. So AHB reads 
will happen in portions of ahb_buf_size, but you dont' stick to this 
when you map the memory. See above.

Regards,
Frieder

> +	}
> +
> +	return 0;
> +}
> +
> +static int nxp_fspi_default_setup(struct nxp_fspi *f)
> +{
> +	void __iomem *base = f->iobase;
> +	int ret, i;
> +	u32 reg;
> +
> +	/* disable and unprepare clock to avoid glitch pass to controller */
> +	nxp_fspi_clk_disable_unprep(f);
> +
> +	/* the default frequency, we will change it later if necessary. */
> +	ret = clk_set_rate(f->clk, 20000000);
> +	if (ret)
> +		return ret;
> +
> +	ret = nxp_fspi_clk_prep_enable(f);
> +	if (ret)
> +		return ret;
> +
> +	/* Reset the module */
> +	/* w1c register, wait unit clear */
> +	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
> +				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
> +	WARN_ON(ret);
> +
> +	/* Disable the module */
> +	fspi_writel(f, FSPI_MCR0_MDIS, base + FSPI_MCR0);
> +
> +	/* Reset the DLL register to default value */
> +	fspi_writel(f, FSPI_DLLACR_OVRDEN, base + FSPI_DLLACR);
> +	fspi_writel(f, FSPI_DLLBCR_OVRDEN, base + FSPI_DLLBCR);
> +
> +	/* enable module */
> +	fspi_writel(f, FSPI_MCR0_AHB_TIMEOUT(0xFF) | FSPI_MCR0_IP_TIMEOUT(0xFF),
> +		 base + FSPI_MCR0);
> +
> +	/*
> +	 * Disable same device enable bit and configure all slave devices
> +	 * independently.
> +	 */
> +	reg = fspi_readl(f, f->iobase + FSPI_MCR2);
> +	reg = reg & ~(FSPI_MCR2_SAMEDEVICEEN);
> +	fspi_writel(f, reg, base + FSPI_MCR2);
> +
> +	/* AHB configuration for access buffer 0~7. */
> +	for (i = 0; i < 7; i++)
> +		fspi_writel(f, 0, base + FSPI_AHBRX_BUF0CR0 + 4 * i);
> +
> +	/*
> +	 * Set ADATSZ with the maximum AHB buffer size to improve the read
> +	 * performance.
> +	 */
> +	fspi_writel(f, (f->devtype_data->ahb_buf_size / 8 |
> +		  FSPI_AHBRXBUF0CR7_PREF), base + FSPI_AHBRX_BUF7CR0);
> +
> +	/* prefetch and no start address alignment limitation */
> +	fspi_writel(f, FSPI_AHBCR_PREF_EN | FSPI_AHBCR_RDADDROPT,
> +		 base + FSPI_AHBCR);
> +
> +	/* AHB Read - Set lut sequence ID for all CS. */
> +	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA1CR2);
> +	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA2CR2);
> +	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHB1CR2);
> +	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHB2CR2);
> +
> +	f->selected = -1;
> +
> +	/* enable the interrupt */
> +	fspi_writel(f, FSPI_INTEN_IPCMDDONE, base + FSPI_INTEN);
> +
> +	return 0;
> +}
> +
> +static const struct spi_controller_mem_ops nxp_fspi_mem_ops = {
> +	.adjust_op_size = nxp_fspi_adjust_op_size,
> +	.supports_op = nxp_fspi_supports_op,
> +	.exec_op = nxp_fspi_exec_op,
> +};
> +
> +static int nxp_fspi_probe(struct platform_device *pdev)
> +{
> +	struct spi_controller *ctlr;
> +	struct device *dev = &pdev->dev;
> +	struct device_node *np = dev->of_node;
> +	struct resource *res;
> +	struct nxp_fspi *f;
> +	int ret;
> +
> +	ctlr = spi_alloc_master(&pdev->dev, sizeof(*f));
> +	if (!ctlr)
> +		return -ENOMEM;
> +
> +	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD |
> +			  SPI_TX_DUAL | SPI_TX_QUAD;
> +
> +	f = spi_controller_get_devdata(ctlr);
> +	f->dev = dev;
> +	f->devtype_data = of_device_get_match_data(dev);
> +	if (!f->devtype_data) {
> +		ret = -ENODEV;
> +		goto err_put_ctrl;
> +	}
> +
> +	platform_set_drvdata(pdev, f);
> +
> +	/* find the resources - configuration register address space */
> +	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_base");
> +	f->iobase = devm_ioremap_resource(dev, res);
> +	if (IS_ERR(f->iobase)) {
> +		ret = PTR_ERR(f->iobase);
> +		goto err_put_ctrl;
> +	}
> +
> +	/* find the resources - controller memory mapped space */
> +	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_mmap");
> +	f->ahb_addr = devm_ioremap_resource(dev, res);
> +	if (IS_ERR(f->ahb_addr)) {
> +		ret = PTR_ERR(f->ahb_addr);
> +		goto err_put_ctrl;
> +	}
> +
> +	/* assign memory mapped starting address and mapped size. */
> +	f->memmap_phy = res->start;
> +	f->memmap_phy_size = resource_size(res);
> +
> +	/* find the clocks */
> +	f->clk_en = devm_clk_get(dev, "fspi_en");
> +	if (IS_ERR(f->clk_en)) {
> +		ret = PTR_ERR(f->clk_en);
> +		goto err_put_ctrl;
> +	}
> +
> +	f->clk = devm_clk_get(dev, "fspi");
> +	if (IS_ERR(f->clk)) {
> +		ret = PTR_ERR(f->clk);
> +		goto err_put_ctrl;
> +	}
> +
> +	ret = nxp_fspi_clk_prep_enable(f);
> +	if (ret) {
> +		dev_err(dev, "can not enable the clock\n");
> +		goto err_put_ctrl;
> +	}
> +
> +	/* find the irq */
> +	ret = platform_get_irq(pdev, 0);
> +	if (ret < 0) {
> +		dev_err(dev, "failed to get the irq: %d\n", ret);
> +		goto err_disable_clk;
> +	}
> +
> +	ret = devm_request_irq(dev, ret,
> +			nxp_fspi_irq_handler, 0, pdev->name, f);
> +	if (ret) {
> +		dev_err(dev, "failed to request irq: %d\n", ret);
> +		goto err_disable_clk;
> +	}
> +
> +	mutex_init(&f->lock);
> +
> +	ctlr->bus_num = -1;
> +	ctlr->num_chipselect = NXP_FSPI_MAX_CHIPSELECT;
> +	ctlr->mem_ops = &nxp_fspi_mem_ops;
> +
> +	nxp_fspi_default_setup(f);
> +
> +	ctlr->dev.of_node = np;
> +
> +	ret = spi_register_controller(ctlr);
> +	if (ret)
> +		goto err_destroy_mutex;
> +
> +	return 0;
> +
> +err_destroy_mutex:
> +	mutex_destroy(&f->lock);
> +
> +err_disable_clk:
> +	nxp_fspi_clk_disable_unprep(f);
> +
> +err_put_ctrl:
> +	spi_controller_put(ctlr);
> +
> +	dev_err(dev, "NXP FSPI probe failed\n");
> +	return ret;
> +}
> +
> +static int nxp_fspi_remove(struct platform_device *pdev)
> +{
> +	struct nxp_fspi *f = platform_get_drvdata(pdev);
> +
> +	/* disable the hardware */
> +	fspi_writel(f, FSPI_MCR0_MDIS, f->iobase + FSPI_MCR0);
> +
> +	nxp_fspi_clk_disable_unprep(f);
> +
> +	mutex_destroy(&f->lock);
> +
> +	return 0;
> +}
> +
> +static int nxp_fspi_suspend(struct device *dev)
> +{
> +	return 0;
> +}
> +
> +static int nxp_fspi_resume(struct device *dev)
> +{
> +	struct nxp_fspi *f = dev_get_drvdata(dev);
> +
> +	nxp_fspi_default_setup(f);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id nxp_fspi_dt_ids[] = {
> +	{ .compatible = "nxp,lx2160a-fspi", .data = (void *)&lx2160a_data, },
> +	{ /* sentinel */ }
> +};
> +MODULE_DEVICE_TABLE(of, nxp_fspi_dt_ids);
> +
> +static const struct dev_pm_ops nxp_fspi_pm_ops = {
> +	.suspend	= nxp_fspi_suspend,
> +	.resume		= nxp_fspi_resume,
> +};
> +
> +static struct platform_driver nxp_fspi_driver = {
> +	.driver = {
> +		.name	= "nxp-fspi",
> +		.of_match_table = nxp_fspi_dt_ids,
> +		.pm =   &nxp_fspi_pm_ops,
> +	},
> +	.probe          = nxp_fspi_probe,
> +	.remove		= nxp_fspi_remove,
> +};
> +module_platform_driver(nxp_fspi_driver);
> +
> +MODULE_DESCRIPTION("NXP FSPI Controller Driver");
> +MODULE_AUTHOR("NXP Semiconductor");
> +MODULE_LICENSE("GPL v2");
>
Yogesh Narayan Gaur Dec. 10, 2018, 9:41 a.m. UTC | #2
Hi Frieder,

Thanks for the review. Please find my comments inline.

> -----Original Message-----
> From: Schrempf Frieder [mailto:frieder.schrempf@kontron.de]
> Sent: Thursday, December 6, 2018 2:53 PM
> To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>; linux-
> mtd@lists.infradead.org; boris.brezillon@bootlin.com; marek.vasut@gmail.com;
> broonie@kernel.org; linux-spi@vger.kernel.org; devicetree@vger.kernel.org
> Cc: robh@kernel.org; mark.rutland@arm.com; shawnguo@kernel.org; linux-
> arm-kernel@lists.infradead.org; computersforpeace@gmail.com; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI controller
> 
> Hi Yogesh,
> 
> I've had a closer look at your v5. See my comments below.
> 
> On 16.11.18 12:13, Yogesh Narayan Gaur wrote:
> > - Add driver for NXP FlexSPI host controller
> >
> > (0) What is the FlexSPI controller?
> >   FlexSPI is a flexsible SPI host controller which supports two SPI
> >   channels and up to 4 external devices. Each channel supports
> >   Single/Dual/Quad/Octal mode data transfer (1/2/4/8 bidirectional
> >   data lines) i.e. FlexSPI acts as an interface to external devices,
> >   maximum 4, each with up to 8 bidirectional data lines.
> >
> >   It uses new SPI memory interface of the SPI framework to issue
> >   flash memory operations to up to four connected flash
> >   devices (2 buses with 2 CS each).
> >
> > (1) Tested this driver with the mtd_debug and JFFS2 filesystem utility
> >   on NXP LX2160ARDB and LX2160AQDS targets.
> >   LX2160ARDB is having two NOR slave device connected on single bus A
> >   i.e. A0 and A1 (CS0 and CS1).
> >   LX2160AQDS is having two NOR slave device connected on separate buses
> >   one flash on A0 and second on B1 i.e. (CS0 and CS3).
> >   Verified this driver on following SPI NOR flashes:
> >      Micron, mt35xu512ab, [Read - 1 bit mode]
> >      Cypress, s25fl512s, [Read - 1/2/4 bit mode]
> >
> > Signed-off-by: Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
> > ---
> > Changes for v5:
> > - Rebase on top of v4.20-rc2
> > - Modified fspi_readl_poll_tout() as per review comments
> > - Arrange header file in alphabetical order
> > - Removed usage of read()/write() function callback pointer
> > - Add support for 1 and 2 byte address length
> > - Change Frieder e-mail to new e-mail address Changes for v4:
> > - Incorporate Boris review comments
> >    * Use readl_poll_timeout() instead of busy looping.
> >    * Re-define register masking as per comment.
> >    * Drop fspi_devtype enum.
> > Changes for v3:
> > - Added endianness flag in platform specific structure instead of DTS.
> > - Modified nxp_fspi_read_ahb(), removed remapping code.
> > - Added Boris and Frieder as Author and provided reference of
> > spi-fsl-qspi.c Changes for v2:
> > - Incorporated Boris review comments.
> > - Remove dependency of driver over connected flash device size.
> > - Modified the logic to select requested CS.
> > - Remove SPI-Octal Macros.
> >
> >   drivers/spi/Kconfig        |   10 +
> >   drivers/spi/Makefile       |    1 +
> >   drivers/spi/spi-nxp-fspi.c | 1145
> ++++++++++++++++++++++++++++++++++++++++++++
> >   3 files changed, 1156 insertions(+)
> >   create mode 100644 drivers/spi/spi-nxp-fspi.c
> >
> > diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index
> > 7d3a5c9..36630a1 100644
> > --- a/drivers/spi/Kconfig
> > +++ b/drivers/spi/Kconfig
> > @@ -259,6 +259,16 @@ config SPI_FSL_LPSPI
> >   	help
> >   	  This enables Freescale i.MX LPSPI controllers in master mode.
> >
> > +config SPI_NXP_FLEXSPI
> > +	tristate "NXP Flex SPI controller"
> > +	depends on ARCH_LAYERSCAPE || HAS_IOMEM
> > +	help
> > +	  This enables support for the Flex SPI controller in master mode.
> > +	  Up to four slave devices can be connected on two buses with two
> > +	  chipselects each.
> > +	  This controller does not support generic SPI messages and only
> > +	  supports the high-level SPI memory interface.
> > +
> >   config SPI_GPIO
> >   	tristate "GPIO-based bitbanging SPI Master"
> >   	depends on GPIOLIB || COMPILE_TEST
> > diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index
> > 3575205..55fec5c 100644
> > --- a/drivers/spi/Makefile
> > +++ b/drivers/spi/Makefile
> > @@ -60,6 +60,7 @@ obj-$(CONFIG_SPI_MPC52xx)		+= spi-
> mpc52xx.o
> >   obj-$(CONFIG_SPI_MT65XX)                += spi-mt65xx.o
> >   obj-$(CONFIG_SPI_MXS)			+= spi-mxs.o
> >   obj-$(CONFIG_SPI_NUC900)		+= spi-nuc900.o
> > +obj-$(CONFIG_SPI_NXP_FLEXSPI)		+= spi-nxp-fspi.o
> >   obj-$(CONFIG_SPI_OC_TINY)		+= spi-oc-tiny.o
> >   spi-octeon-objs				:= spi-cavium.o spi-cavium-
> octeon.o
> >   obj-$(CONFIG_SPI_OCTEON)		+= spi-octeon.o
> > diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
> > new file mode 100644 index 0000000..a35013b
> > --- /dev/null
> > +++ b/drivers/spi/spi-nxp-fspi.c
> > @@ -0,0 +1,1145 @@
> > +// SPDX-License-Identifier: GPL-2.0+
> > +
> > +/*
> > + * NXP FlexSPI(FSPI) controller driver.
> > + *
> > + * Copyright 2018 NXP.
> > + *
> > + * FlexSPI is a flexsible SPI host controller which supports two SPI
> > + * channels and up to 4 external devices. Each channel supports
> > + * Single/Dual/Quad/Octal mode data transfer (1/2/4/8 bidirectional
> > + * data lines).
> > + *
> > + * FlexSPI controller is driven by the LUT(Look-up Table) registers
> > + * LUT registers are a look-up-table for sequences of instructions.
> > + * A valid sequence consists of four LUT registers.
> > + * Maximum 32 LUT sequences can be programmed simultaneously.
> > + *
> > + * LUTs are being created at run-time based on the commands passed
> > + * from the spi-mem framework, thus using single LUT index.
> > + *
> > + * Software triggered Flash read/write access by IP Bus.
> > + *
> > + * Memory mapped read access by AHB Bus.
> > + *
> > + * Based on SPI MEM interface and spi-fsl-qspi.c driver.
> > + *
> > + * Author:
> > + *     Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
> > + *     Boris Brezillion <boris.brezillon@bootlin.com>
> > + *     Frieder Schrempf <frieder.schrempf@kontron.de>
> > + */
> > +
> > +#include <linux/bitops.h>
> > +#include <linux/clk.h>
> > +#include <linux/completion.h>
> > +#include <linux/delay.h>
> > +#include <linux/err.h>
> > +#include <linux/errno.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/iopoll.h>
> > +#include <linux/jiffies.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/mutex.h>
> > +#include <linux/of.h>
> > +#include <linux/of_device.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/pm_qos.h>
> > +#include <linux/sizes.h>
> > +
> > +#include <linux/spi/spi.h>
> > +#include <linux/spi/spi-mem.h>
> > +
> > +/*
> > + * The driver only uses one single LUT entry, that is updated on
> > + * each call of exec_op(). Index 0 is preset at boot with a basic
> > + * read operation, so let's use the last entry (31).
> > + */
> > +#define	SEQID_LUT			31
> > +
> > +/* Registers used by the driver */
> > +#define FSPI_MCR0			0x00
> > +#define FSPI_MCR0_AHB_TIMEOUT(x)	((x) << 24)
> > +#define FSPI_MCR0_IP_TIMEOUT(x)		((x) << 16)
> > +#define FSPI_MCR0_LEARN_EN		BIT(15)
> > +#define FSPI_MCR0_SCRFRUN_EN		BIT(14)
> > +#define FSPI_MCR0_OCTCOMB_EN		BIT(13)
> > +#define FSPI_MCR0_DOZE_EN		BIT(12)
> > +#define FSPI_MCR0_HSEN			BIT(11)
> > +#define FSPI_MCR0_SERCLKDIV		BIT(8)
> > +#define FSPI_MCR0_ATDF_EN		BIT(7)
> > +#define FSPI_MCR0_ARDF_EN		BIT(6)
> > +#define FSPI_MCR0_RXCLKSRC(x)		((x) << 4)
> > +#define FSPI_MCR0_END_CFG(x)		((x) << 2)
> > +#define FSPI_MCR0_MDIS			BIT(1)
> > +#define FSPI_MCR0_SWRST			BIT(0)
> > +
> > +#define FSPI_MCR1			0x04
> > +#define FSPI_MCR1_SEQ_TIMEOUT(x)	((x) << 16)
> > +#define FSPI_MCR1_AHB_TIMEOUT(x)	(x)
> > +
> > +#define FSPI_MCR2			0x08
> > +#define FSPI_MCR2_IDLE_WAIT(x)		((x) << 24)
> > +#define FSPI_MCR2_SAMEDEVICEEN		BIT(15)
> > +#define FSPI_MCR2_CLRLRPHS		BIT(14)
> > +#define FSPI_MCR2_ABRDATSZ		BIT(8)
> > +#define FSPI_MCR2_ABRLEARN		BIT(7)
> > +#define FSPI_MCR2_ABR_READ		BIT(6)
> > +#define FSPI_MCR2_ABRWRITE		BIT(5)
> > +#define FSPI_MCR2_ABRDUMMY		BIT(4)
> > +#define FSPI_MCR2_ABR_MODE		BIT(3)
> > +#define FSPI_MCR2_ABRCADDR		BIT(2)
> > +#define FSPI_MCR2_ABRRADDR		BIT(1)
> > +#define FSPI_MCR2_ABR_CMD		BIT(0)
> > +
> > +#define FSPI_AHBCR			0x0c
> > +#define FSPI_AHBCR_RDADDROPT		BIT(6)
> > +#define FSPI_AHBCR_PREF_EN		BIT(5)
> > +#define FSPI_AHBCR_BUFF_EN		BIT(4)
> > +#define FSPI_AHBCR_CACH_EN		BIT(3)
> > +#define FSPI_AHBCR_CLRTXBUF		BIT(2)
> > +#define FSPI_AHBCR_CLRRXBUF		BIT(1)
> > +#define FSPI_AHBCR_PAR_EN		BIT(0)
> > +
> > +#define FSPI_INTEN			0x10
> > +#define FSPI_INTEN_SCLKSBWR		BIT(9)
> > +#define FSPI_INTEN_SCLKSBRD		BIT(8)
> > +#define FSPI_INTEN_DATALRNFL		BIT(7)
> > +#define FSPI_INTEN_IPTXWE		BIT(6)
> > +#define FSPI_INTEN_IPRXWA		BIT(5)
> > +#define FSPI_INTEN_AHBCMDERR		BIT(4)
> > +#define FSPI_INTEN_IPCMDERR		BIT(3)
> > +#define FSPI_INTEN_AHBCMDGE		BIT(2)
> > +#define FSPI_INTEN_IPCMDGE		BIT(1)
> > +#define FSPI_INTEN_IPCMDDONE		BIT(0)
> > +
> > +#define FSPI_INTR			0x14
> > +#define FSPI_INTR_SCLKSBWR		BIT(9)
> > +#define FSPI_INTR_SCLKSBRD		BIT(8)
> > +#define FSPI_INTR_DATALRNFL		BIT(7)
> > +#define FSPI_INTR_IPTXWE		BIT(6)
> > +#define FSPI_INTR_IPRXWA		BIT(5)
> > +#define FSPI_INTR_AHBCMDERR		BIT(4)
> > +#define FSPI_INTR_IPCMDERR		BIT(3)
> > +#define FSPI_INTR_AHBCMDGE		BIT(2)
> > +#define FSPI_INTR_IPCMDGE		BIT(1)
> > +#define FSPI_INTR_IPCMDDONE		BIT(0)
> > +
> > +#define FSPI_LUTKEY			0x18
> > +#define FSPI_LUTKEY_VALUE		0x5AF05AF0
> > +
> > +#define FSPI_LCKCR			0x1C
> > +
> > +#define FSPI_LCKER_LOCK			0x1
> > +#define FSPI_LCKER_UNLOCK		0x2
> > +
> > +#define FSPI_BUFXCR_INVALID_MSTRID	0xE
> > +#define FSPI_AHBRX_BUF0CR0		0x20
> > +#define FSPI_AHBRX_BUF1CR0		0x24
> > +#define FSPI_AHBRX_BUF2CR0		0x28
> > +#define FSPI_AHBRX_BUF3CR0		0x2C
> > +#define FSPI_AHBRX_BUF4CR0		0x30
> > +#define FSPI_AHBRX_BUF5CR0		0x34
> > +#define FSPI_AHBRX_BUF6CR0		0x38
> > +#define FSPI_AHBRX_BUF7CR0		0x3C
> > +#define FSPI_AHBRXBUF0CR7_PREF		BIT(31)
> > +
> > +#define FSPI_AHBRX_BUF0CR1		0x40
> > +#define FSPI_AHBRX_BUF1CR1		0x44
> > +#define FSPI_AHBRX_BUF2CR1		0x48
> > +#define FSPI_AHBRX_BUF3CR1		0x4C
> > +#define FSPI_AHBRX_BUF4CR1		0x50
> > +#define FSPI_AHBRX_BUF5CR1		0x54
> > +#define FSPI_AHBRX_BUF6CR1		0x58
> > +#define FSPI_AHBRX_BUF7CR1		0x5C
> > +
> > +#define FSPI_FLSHA1CR0			0x60
> > +#define FSPI_FLSHA2CR0			0x64
> > +#define FSPI_FLSHB1CR0			0x68
> > +#define FSPI_FLSHB2CR0			0x6C
> > +#define FSPI_FLSHXCR0_SZ_KB		10
> > +#define FSPI_FLSHXCR0_SZ(x)		((x) >> FSPI_FLSHXCR0_SZ_KB)
> > +
> > +#define FSPI_FLSHA1CR1			0x70
> > +#define FSPI_FLSHA2CR1			0x74
> > +#define FSPI_FLSHB1CR1			0x78
> > +#define FSPI_FLSHB2CR1			0x7C
> > +#define FSPI_FLSHXCR1_CSINTR(x)		((x) << 16)
> > +#define FSPI_FLSHXCR1_CAS(x)		((x) << 11)
> > +#define FSPI_FLSHXCR1_WA		BIT(10)
> > +#define FSPI_FLSHXCR1_TCSH(x)		((x) << 5)
> > +#define FSPI_FLSHXCR1_TCSS(x)		(x)
> > +
> > +#define FSPI_FLSHA1CR2			0x80
> > +#define FSPI_FLSHA2CR2			0x84
> > +#define FSPI_FLSHB1CR2			0x88
> > +#define FSPI_FLSHB2CR2			0x8C
> > +#define FSPI_FLSHXCR2_CLRINSP		BIT(24)
> > +#define FSPI_FLSHXCR2_AWRWAIT		BIT(16)
> > +#define FSPI_FLSHXCR2_AWRSEQN_SHIFT	13
> > +#define FSPI_FLSHXCR2_AWRSEQI_SHIFT	8
> > +#define FSPI_FLSHXCR2_ARDSEQN_SHIFT	5
> > +#define FSPI_FLSHXCR2_ARDSEQI_SHIFT	0
> > +
> > +#define FSPI_IPCR0			0xA0
> > +
> > +#define FSPI_IPCR1			0xA4
> > +#define FSPI_IPCR1_IPAREN		BIT(31)
> > +#define FSPI_IPCR1_SEQNUM_SHIFT		24
> > +#define FSPI_IPCR1_SEQID_SHIFT		16
> > +#define FSPI_IPCR1_IDATSZ(x)		(x)
> > +
> > +#define FSPI_IPCMD			0xB0
> > +#define FSPI_IPCMD_TRG			BIT(0)
> > +
> > +#define FSPI_DLPR			0xB4
> > +
> > +#define FSPI_IPRXFCR			0xB8
> > +#define FSPI_IPRXFCR_CLR		BIT(0)
> > +#define FSPI_IPRXFCR_DMA_EN		BIT(1)
> > +#define FSPI_IPRXFCR_WMRK(x)		((x) << 2)
> > +
> > +#define FSPI_IPTXFCR			0xBC
> > +#define FSPI_IPTXFCR_CLR		BIT(0)
> > +#define FSPI_IPTXFCR_DMA_EN		BIT(1)
> > +#define FSPI_IPTXFCR_WMRK(x)		((x) << 2)
> > +
> > +#define FSPI_DLLACR			0xC0
> > +#define FSPI_DLLACR_OVRDEN		BIT(8)
> > +
> > +#define FSPI_DLLBCR			0xC4
> > +#define FSPI_DLLBCR_OVRDEN		BIT(8)
> > +
> > +#define FSPI_STS0			0xE0
> > +#define FSPI_STS0_DLPHB(x)		((x) << 8)
> > +#define FSPI_STS0_DLPHA(x)		((x) << 4)
> > +#define FSPI_STS0_CMD_SRC(x)		((x) << 2)
> > +#define FSPI_STS0_ARB_IDLE		BIT(1)
> > +#define FSPI_STS0_SEQ_IDLE		BIT(0)
> > +
> > +#define FSPI_STS1			0xE4
> > +#define FSPI_STS1_IP_ERRCD(x)		((x) << 24)
> > +#define FSPI_STS1_IP_ERRID(x)		((x) << 16)
> > +#define FSPI_STS1_AHB_ERRCD(x)		((x) << 8)
> > +#define FSPI_STS1_AHB_ERRID(x)		(x)
> > +
> > +#define FSPI_AHBSPNST			0xEC
> > +#define FSPI_AHBSPNST_DATLFT(x)		((x) << 16)
> > +#define FSPI_AHBSPNST_BUFID(x)		((x) << 1)
> > +#define FSPI_AHBSPNST_ACTIVE		BIT(0)
> > +
> > +#define FSPI_IPRXFSTS			0xF0
> > +#define FSPI_IPRXFSTS_RDCNTR(x)		((x) << 16)
> > +#define FSPI_IPRXFSTS_FILL(x)		(x)
> > +
> > +#define FSPI_IPTXFSTS			0xF4
> > +#define FSPI_IPTXFSTS_WRCNTR(x)		((x) << 16)
> > +#define FSPI_IPTXFSTS_FILL(x)		(x)
> > +
> > +#define FSPI_RFDR			0x100
> > +#define FSPI_TFDR			0x180
> > +
> > +#define FSPI_LUT_BASE			0x200
> > +#define FSPI_LUT_OFFSET			(SEQID_LUT * 4 * 4)
> > +#define FSPI_LUT_REG(idx) \
> > +	(FSPI_LUT_BASE + FSPI_LUT_OFFSET + (idx) * 4)
> > +
> > +/* register map end */
> > +
> > +/* Instruction set for the LUT register. */
> > +#define LUT_STOP			0x00
> > +#define LUT_CMD				0x01
> > +#define LUT_ADDR			0x02
> > +#define LUT_CADDR_SDR			0x03
> > +#define LUT_MODE			0x04
> > +#define LUT_MODE2			0x05
> > +#define LUT_MODE4			0x06
> > +#define LUT_MODE8			0x07
> > +#define LUT_NXP_WRITE			0x08
> > +#define LUT_NXP_READ			0x09
> > +#define LUT_LEARN_SDR			0x0A
> > +#define LUT_DATSZ_SDR			0x0B
> > +#define LUT_DUMMY			0x0C
> > +#define LUT_DUMMY_RWDS_SDR		0x0D
> > +#define LUT_JMP_ON_CS			0x1F
> > +#define LUT_CMD_DDR			0x21
> > +#define LUT_ADDR_DDR			0x22
> > +#define LUT_CADDR_DDR			0x23
> > +#define LUT_MODE_DDR			0x24
> > +#define LUT_MODE2_DDR			0x25
> > +#define LUT_MODE4_DDR			0x26
> > +#define LUT_MODE8_DDR			0x27
> > +#define LUT_WRITE_DDR			0x28
> > +#define LUT_READ_DDR			0x29
> > +#define LUT_LEARN_DDR			0x2A
> > +#define LUT_DATSZ_DDR			0x2B
> > +#define LUT_DUMMY_DDR			0x2C
> > +#define LUT_DUMMY_RWDS_DDR		0x2D
> > +
> > +/*
> > + * Calculate number of required PAD bits for LUT register.
> > + *
> > + * The pad stands for the number of IO lines [0:7].
> > + * For example, the octal read needs eight IO lines,
> > + * so you should use LUT_PAD(8). This macro
> > + * returns 3 i.e. use eight (2^3) IP lines for read.
> > + */
> > +#define LUT_PAD(x) (fls(x) - 1)
> > +
> > +/*
> > + * Macro for constructing the LUT entries with the following
> > + * register layout:
> > + *
> > + *  ---------------------------------------------------
> > + *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
> > + *  ---------------------------------------------------
> > + */
> > +#define PAD_SHIFT		8
> > +#define INSTR_SHIFT		10
> > +#define OPRND_SHIFT		16
> > +
> > +/* Macros for constructing the LUT register. */
> > +#define LUT_DEF(idx, ins, pad, opr)			  \
> > +	((((ins) << INSTR_SHIFT) | ((pad) << PAD_SHIFT) | \
> > +	(opr)) << (((idx) % 2) * OPRND_SHIFT))
> > +
> > +/* Operands for the LUT register. */
> > +#define ADDR8BIT		0x08
> > +#define ADDR16BIT		0x10
> > +#define ADDR24BIT		0x18
> > +#define ADDR32BIT		0x20
> 
> You can drop these ADDRXBIT definitions, see below...
> 
Ok, would drop in next version.

> > +
> > +#define POLL_TOUT		5000
> > +#define NXP_FSPI_MAX_CHIPSELECT		4
> > +
> > +struct nxp_fspi_devtype_data {
> > +	unsigned int rxfifo;
> > +	unsigned int txfifo;
> > +	unsigned int ahb_buf_size;
> > +	unsigned int quirks;
> > +	bool little_endian;
> > +};
> > +
> > +static const struct nxp_fspi_devtype_data lx2160a_data = {
> > +	.rxfifo = SZ_512,       /* (64  * 64 bits)  */
> > +	.txfifo = SZ_1K,        /* (128 * 64 bits)  */
> > +	.ahb_buf_size = SZ_2K,  /* (256 * 64 bits)  */
> > +	.quirks = 0,
> > +	.little_endian = true,  /* little-endian    */
> > +};
> > +
> > +struct nxp_fspi {
> > +	void __iomem *iobase;
> > +	void __iomem *ahb_addr;
> > +	u32 memmap_phy;
> > +	u32 memmap_phy_size;
> > +	struct clk *clk, *clk_en;
> > +	struct device *dev;
> > +	struct completion c;
> > +	const struct nxp_fspi_devtype_data *devtype_data;
> > +	struct mutex lock;
> > +	struct pm_qos_request pm_qos_req;
> > +	int selected;
> > +};
> > +
> > +/*
> > + * R/W functions for big- or little-endian registers:
> > + * The FSPI controller's endianness is independent of
> > + * the CPU core's endianness. So far, although the CPU
> > + * core is little-endian the FSPI controller can use
> > + * big-endian or little-endian.
> > + */
> > +static void fspi_writel(struct nxp_fspi *f, u32 val, void __iomem
> > +*addr) {
> > +	if (f->devtype_data->little_endian)
> > +		iowrite32(val, addr);
> > +	else
> > +		iowrite32be(val, addr);
> > +}
> > +
> > +static u32 fspi_readl(struct nxp_fspi *f, void __iomem *addr) {
> > +	if (f->devtype_data->little_endian)
> > +		return ioread32(addr);
> > +	else
> > +		return ioread32be(addr);
> > +}
> > +
> > +static irqreturn_t nxp_fspi_irq_handler(int irq, void *dev_id) {
> > +	struct nxp_fspi *f = dev_id;
> > +	u32 reg;
> > +
> > +	/* clear interrupt */
> > +	reg = fspi_readl(f, f->iobase + FSPI_INTR);
> > +	fspi_writel(f, FSPI_INTR_IPCMDDONE, f->iobase + FSPI_INTR);
> > +
> > +	if (reg & FSPI_INTR_IPCMDDONE)
> > +		complete(&f->c);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> > +static int nxp_fspi_check_buswidth(struct nxp_fspi *f, u8 width) {
> > +	switch (width) {
> > +	case 1:
> > +	case 2:
> > +	case 4:
> > +	case 8:
> > +		return 0;
> > +	}
> > +
> > +	return -ENOTSUPP;
> > +}
> > +
> > +static bool nxp_fspi_supports_op(struct spi_mem *mem,
> > +				 const struct spi_mem_op *op)
> > +{
> > +	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
> > +	int ret;
> > +
> > +	ret = nxp_fspi_check_buswidth(f, op->cmd.buswidth);
> > +
> > +	if (op->addr.nbytes)
> > +		ret |= nxp_fspi_check_buswidth(f, op->addr.buswidth);
> > +
> > +	if (op->dummy.nbytes)
> > +		ret |= nxp_fspi_check_buswidth(f, op->dummy.buswidth);
> > +
> > +	if (op->data.nbytes)
> > +		ret |= nxp_fspi_check_buswidth(f, op->data.buswidth);
> > +
> > +	if (ret)
> > +		return false;
> > +
> > +	/*
> > +	 * The number of instructions needed for the op, needs
> > +	 * to fit into a single LUT entry.
> > +	 */
> > +	if (op->addr.nbytes +
> > +	   (op->dummy.nbytes ? 1:0) +
> > +	   (op->data.nbytes ? 1:0) > 6)
> > +		return false;
> > +
> > +	/* Max 64 dummy clock cycles supported */
> > +	if (op->dummy.buswidth &&
> > +	    (op->dummy.nbytes * 8 / op->dummy.buswidth > 64))
> > +		return false;
> > +
> > +	/* Max data length, check controller limits and alignment */
> > +	if (op->data.dir == SPI_MEM_DATA_IN &&
> > +	    (op->data.nbytes > f->devtype_data->ahb_buf_size ||
> > +	     (op->data.nbytes > f->devtype_data->rxfifo - 4 &&
> > +	      !IS_ALIGNED(op->data.nbytes, 8))))
> > +		return false;
> > +
> > +	if (op->data.dir == SPI_MEM_DATA_OUT &&
> > +	    op->data.nbytes > f->devtype_data->txfifo)
> > +		return false;
> > +
> > +	return true;
> > +}
> > +
> > +/* Instead of busy looping invoke readl_poll_timeout functionality.
> > +*/ static int fspi_readl_poll_tout(struct nxp_fspi *f, void __iomem *base,
> > +				u32 mask, u32 delay_us,
> > +				u32 timeout_us, bool condition)
> > +{
> > +	u32 reg;
> > +
> > +	if (!f->devtype_data->little_endian)
> > +		mask = (u32)cpu_to_be32(mask);
> > +
> > +	if (condition)
> > +		return readl_poll_timeout(base, reg, (reg & mask),
> > +					  delay_us, timeout_us);
> > +	else
> > +		return readl_poll_timeout(base, reg, !(reg & mask),
> > +					  delay_us, timeout_us);
> 
> I would rather use a local variable to store the condition:
> 
> bool c = condition ? (reg & mask):!(reg & mask);
> 
With these type of usage getting below warning messages.
 
drivers/spi/spi-nxp-fspi.c: In function ‘fspi_readl_poll_tout.isra.10.constprop’:
drivers/spi/spi-nxp-fspi.c:446:21: warning: ‘reg’ may be used uninitialized in this function [-Wmaybe-uninitialized]
  bool cn = c ? (reg & mask) : !(reg & mask);

If assign value to reg = 0xffffffff then timeout is start getting hit for False case and if assign value 0 then start getting timeout hit for true case.

I would rather not try to modify this function.

> return readl_poll_timeout(base, reg, c, delay_us, timeout_us);
> 
> > +}
> > +
> > +/*
> > + * If the slave device content being changed by Write/Erase, need to
> > + * invalidate the AHB buffer. This can be achieved by doing the reset
> > + * of controller after setting MCR0[SWRESET] bit.
> > + */
> > +static inline void nxp_fspi_invalid(struct nxp_fspi *f) {
> > +	u32 reg;
> > +	int ret;
> > +
> > +	reg = fspi_readl(f, f->iobase + FSPI_MCR0);
> > +	fspi_writel(f, reg | FSPI_MCR0_SWRST, f->iobase + FSPI_MCR0);
> > +
> > +	/* w1c register, wait unit clear */
> > +	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
> > +				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
> > +	WARN_ON(ret);
> > +}
> > +
> > +static void nxp_fspi_prepare_lut(struct nxp_fspi *f,
> > +				 const struct spi_mem_op *op)
> > +{
> > +	void __iomem *base = f->iobase;
> > +	u32 lutval[4] = {};
> > +	int lutidx = 1, i;
> > +
> > +	/* cmd */
> > +	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
> > +			     op->cmd.opcode);
> > +
> > +	/* addr bus width */
> > +	if (op->addr.nbytes) {
> > +		u32 addrlen = 0;
> > +
> > +		switch (op->addr.nbytes) {
> > +		case 1:
> > +			addrlen = ADDR8BIT;
> > +			break;
> > +		case 2:
> > +			addrlen = ADDR16BIT;
> > +			break;
> > +		case 3:
> > +			addrlen = ADDR24BIT;
> > +			break;
> > +		case 4:
> > +			addrlen = ADDR32BIT;
> > +			break;
> > +		default:
> > +			dev_err(f->dev, "In-correct address length\n");
> > +			return;
> > +		}
> 
> You don't need to validate op->addr.nbytes here, this is already done in
> nxp_fspi_supports_op().

Yes, I need to validate op->addr.nbytes else LUT would going to be programmed for 0 addrlen.
I have checked this on the target.

> 
> > +
> > +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_ADDR,
> > +					      LUT_PAD(op->addr.buswidth),
> > +					      addrlen);
> 
> You can also just remove the whole switch statement above and use this:
> 
> lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_ADDR,
> 			      LUT_PAD(op->addr.buswidth),
> 			      op->addr.nbytes * 8);
> 
Ok, would include in next version.

> > +		lutidx++;
> > +	}
> > +
> > +	/* dummy bytes, if needed */
> > +	if (op->dummy.nbytes) {
> > +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
> > +		/*
> > +		 * Due to FlexSPI controller limitation number of PAD for
> dummy
> > +		 * buswidth needs to be programmed as equal to data buswidth.
> > +		 */
> > +					      LUT_PAD(op->data.buswidth),
> > +					      op->dummy.nbytes * 8 /
> > +					      op->dummy.buswidth);
> > +		lutidx++;
> > +	}
> > +
> > +	/* read/write data bytes */
> > +	if (op->data.nbytes) {
> > +		lutval[lutidx / 2] |= LUT_DEF(lutidx,
> > +					      op->data.dir ==
> SPI_MEM_DATA_IN ?
> > +					      LUT_NXP_READ : LUT_NXP_WRITE,
> > +					      LUT_PAD(op->data.buswidth),
> > +					      0);
> > +		lutidx++;
> > +	}
> > +
> > +	/* stop condition. */
> > +	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
> > +
> > +	/* unlock LUT */
> > +	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
> > +	fspi_writel(f, FSPI_LCKER_UNLOCK, f->iobase + FSPI_LCKCR);
> > +
> > +	/* fill LUT */
> > +	for (i = 0; i < ARRAY_SIZE(lutval); i++)
> > +		fspi_writel(f, lutval[i], base + FSPI_LUT_REG(i));
> > +
> > +	dev_dbg(f->dev, "CMD[%x] lutval[0:%x \t 1:%x \t 2:%x \t 3:%x]\n",
> > +		op->cmd.opcode, lutval[0], lutval[1], lutval[2], lutval[3]);
> > +
> > +	/* lock LUT */
> > +	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
> > +	fspi_writel(f, FSPI_LCKER_LOCK, f->iobase + FSPI_LCKCR); }
> > +
> > +static int nxp_fspi_clk_prep_enable(struct nxp_fspi *f) {
> > +	int ret;
> > +
> > +	ret = clk_prepare_enable(f->clk_en);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = clk_prepare_enable(f->clk);
> > +	if (ret) {
> > +		clk_disable_unprepare(f->clk_en);
> > +		return ret;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void nxp_fspi_clk_disable_unprep(struct nxp_fspi *f) {
> > +	clk_disable_unprepare(f->clk);
> > +	clk_disable_unprepare(f->clk_en);
> > +}
> > +
> > +/*
> > + * In FlexSPI controller, flash access is based on value of
> > +FSPI_FLSHXXCR0
> > + * register and start base address of the slave device.
> > + *
> > + *							    (Higher address)
> > + *				--------    <-- FLSHB2CR0
> > + *				|  B2  |
> > + *				|      |
> > + *	B2 start address -->	--------    <-- FLSHB1CR0
> > + *				|  B1  |
> > + *				|      |
> > + *	B1 start address -->	--------    <-- FLSHA2CR0
> > + *				|  A2  |
> > + *				|      |
> > + *	A2 start address -->	--------    <-- FLSHA1CR0
> > + *				|  A1  |
> > + *				|      |
> > + *	A1 start address -->	--------		    (Lower address)
> > + *
> > + *
> > + * Start base address defines the starting address range for given CS
> > +and
> > + * FSPI_FLSHXXCR0 defines the size of the slave device connected at given CS.
> > + *
> > + * But, different targets are having different combinations of number
> > +of CS,
> > + * some targets only have single CS or two CS covering controller's
> > +full
> > + * memory mapped space area.
> > + * Thus, implementation is being done as independent of the size and
> > +number
> > + * of the connected slave device.
> > + * Assign controller memory mapped space size as the size to the
> > +connected
> > + * slave device.
> > + * Mark FLSHxxCR0 as zero initially and then assign value only to the
> > +selected
> > + * chip-select Flash configuration register.
> > + *
> > + * For e.g. to access CS2 (B1), FLSHB1CR0 register would be equal to
> > +the
> > + * memory mapped size of the controller.
> > + * Value for rest of the CS FLSHxxCR0 register would be zero.
> > + *
> > + */
> > +static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device
> > +*spi) {
> > +	unsigned long rate = spi->max_speed_hz;
> > +	int ret;
> > +	uint64_t size_kb;
> > +
> > +	/*
> > +	 * Return, if previously selected slave device is same as current
> > +	 * requested slave device.
> > +	 */
> > +	if (f->selected == spi->chip_select)
> > +		return;
> > +
> > +	/* Reset FLSHxxCR0 registers */
> > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA1CR0);
> > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA2CR0);
> > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB1CR0);
> > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB2CR0);
> > +
> > +	/* Assign controller memory mapped space as size, KBytes, of flash. */
> > +	size_kb = FSPI_FLSHXCR0_SZ(f->memmap_phy_size);
> 
Above description of this function, explains the reason for using memmap_phy_size.
This is not the arbitrary size, but the memory mapped size being assigned to the controller.

> You are still using memory of arbitrary size (memmap_phy_size) for mapping the
> flash. Why not use the same approach as in the QSPI driver and just map
> ahb_buf_size until we implement the dirmap API?
The approach which being used in QSPI driver didn't work here, I have tried with that.
In QSPI driver, while preparing LUT we are assigning read/write address in the LUT preparation and have to for some unknown hack have to provide macro for LUT_MODE instead of LUT_ADDR.
But this thing didn't work for FlexSPI.
I discussed with HW IP owner and they suggested only to use LUT_ADDR for specifying the address length of the command i.e. 3-byte or 4-byte address command (NOR) or 1-2 byte address command for NAND.

Thus, in LUT preparation we have assigned only the base address.
Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register then for read/write data beyond limit of ahb_buf_size offset I get data corruption.

Thus, for generic approach have assigned FSPI_FLSHXXCR0 equal to the memory mapped size to the controller. This would also not going to depend on the number of CS present on the target.

> You are already aligning the AHB reads for this in nxp_fspi_adjust_op_size().
> 
Yes, max read data size can be ahb_buf_size. Thus we need to check max read size with ahb_buf_size.

> > +
> > +	switch (spi->chip_select) {
> > +	case 0:
> > +		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHA1CR0);
> > +		break;
> > +	case 1:
> > +		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHA2CR0);
> > +		break;
> > +	case 2:
> > +		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHB1CR0);
> > +		break;
> > +	case 3:
> > +		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHB2CR0);
> > +		break;
> > +	default:
> > +		dev_err(f->dev, "In-correct CS provided\n");
> > +		return;
> 
> You don't need to validate spi->chip_select here. This should never be invalid
> and if it is, something is really wrong and your check won't help.
Ok, would remove in next version.

> 
> > +	}
> > +
> > +	dev_dbg(f->dev, "Slave device [CS:%x] selected\n",
> > +spi->chip_select);
> > +
> > +	nxp_fspi_clk_disable_unprep(f);
> > +
> > +	ret = clk_set_rate(f->clk, rate);
> > +	if (ret)
> > +		return;
> > +
> > +	ret = nxp_fspi_clk_prep_enable(f);
> > +	if (ret)
> > +		return;
> 
> Missing newline line here.
Ok

> 
> > +	f->selected = spi->chip_select;
> > +}
> > +
> > +static void nxp_fspi_read_ahb(struct nxp_fspi *f, const struct
> > +spi_mem_op *op) {
> > +	u32 len = op->data.nbytes;
> > +
> > +	/* Read out the data directly from the AHB buffer. */
> > +	memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val), len); }
> > +
> > +static void nxp_fspi_fill_txfifo(struct nxp_fspi *f,
> > +				 const struct spi_mem_op *op)
> > +{
> > +	void __iomem *base = f->iobase;
> > +	int i, j, ret;
> > +	int size, tmp_size, wm_size;
> > +	u32 data = 0;
> > +	u32 *txbuf = (u32 *) op->data.buf.out;
> > +
> > +	/* clear the TX FIFO. */
> > +	fspi_writel(f, FSPI_IPTXFCR_CLR, base + FSPI_IPTXFCR);
> > +
> > +	/* Default value of water mark level is 8 bytes. */
> > +	wm_size = 8;
> > +	size = op->data.nbytes / wm_size;
> > +	for (i = 0; i < size; i++) {
> > +		/* Wait for TXFIFO empty */
> > +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > +					   FSPI_INTR_IPTXWE, 0,
> > +					   POLL_TOUT, true);
> > +		WARN_ON(ret);
> > +
> > +		j = 0;
> > +		tmp_size = wm_size;
> > +		while (tmp_size > 0) {
> > +			data = 0;
> > +			memcpy(&data, txbuf, 4);
> > +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
> > +			tmp_size -= 4;
> > +			j++;
> > +			txbuf += 1;
> > +		}
> > +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
> > +	}
> > +
> > +	size = op->data.nbytes % wm_size;
> > +	if (size) {
> > +		/* Wait for TXFIFO empty */
> > +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > +					   FSPI_INTR_IPTXWE, 0,
> > +					   POLL_TOUT, true);
> > +		WARN_ON(ret);
> > +
> > +		j = 0;
> > +		tmp_size = 0;
> > +		while (size > 0) {
> > +			data = 0;
> > +			tmp_size = (size < 4) ? size : 4;
> > +			memcpy(&data, txbuf, tmp_size);
> > +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
> > +			size -= tmp_size;
> > +			j++;
> > +			txbuf += 1;
> > +		}
> > +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
> > +	}
> 
> All these nested loops to fill the TX buffer and also the ones below to read the
> RX buffer look much more complicated than they should really be. Can you try to
> make this more readable?
Yes
> 
> Maybe something like this would work:
> 
> for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 8); i += 8) {
> 	/* Wait for TXFIFO empty */
> 	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> 				   FSPI_INTR_IPTXWE, 0,
> 				   POLL_TOUT, true);
> 
> 	fspi_writel(f, op->data.buf.out + i, base + FSPI_TFDR);
> 	fspi_writel(f, op->data.buf.out + i + 4, base + FSPI_TFDR + 4);
> 	fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR); }
With this above 2 lines we are hardcoding it for read/write with watermark size as 8 bytes.
Watermark size can be variable and depends on the value of IPRXFCR/IPTXFCR register with default value as 8 bytes
Thus, I would still prefer to use the internal for loop instead of 2 fspi_writel(...) for FSPI_TFDR and FSPI_TFDR + 4 register write commands.

> 
> if (i < op->data.nbytes) {
> 	u32 data = 0;
> 	int j;
> 	/* Wait for TXFIFO empty */
> 	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> 				   FSPI_INTR_IPTXWE, 0,
> 				   POLL_TOUT, true);
> 
> 	for (j = 0; j < ALIGN(op->data.nbytes - i, 4); j += 4) {
> 		memcpy(&data, op->data.buf.out + i + j, 4);
> 		fspi_writel(f, data, base + FSPI_TFDR + j);
> 	}
> 
> 	fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR); }
> 
> > +}
> > +
> > +static void nxp_fspi_read_rxfifo(struct nxp_fspi *f,
> > +			  const struct spi_mem_op *op)
> > +{
> > +	void __iomem *base = f->iobase;
> > +	int i, j;
> > +	int size, tmp_size, wm_size, ret;
> > +	u32 tmp = 0;
> > +	u8 *buf = op->data.buf.in;
> > +	u32 len = op->data.nbytes;
> > +
> > +	/* Default value of water mark level is 8 bytes. */
> > +	wm_size = 8;
> > +
> > +	while (len > 0) {
> > +		size = len / wm_size;
> > +
> > +		for (i = 0; i < size; i++) {
> > +			/* Wait for RXFIFO available */
> > +			ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > +						   FSPI_INTR_IPRXWA, 0,
> > +						   POLL_TOUT, true);
> > +			WARN_ON(ret);
> > +
> > +			j = 0;
> > +			tmp_size = wm_size;
> > +			while (tmp_size > 0) {
> > +				tmp = 0;
> > +				tmp = fspi_readl(f, base + FSPI_RFDR + j * 4);
> > +				memcpy(buf, &tmp, 4);
> > +				tmp_size -= 4;
> > +				j++;
> > +				buf += 4;
> > +			}
> > +			/* move the FIFO pointer */
> > +			fspi_writel(f, FSPI_INTR_IPRXWA, base + FSPI_INTR);
> > +			len -= wm_size;
> > +		}
> > +
> > +		size = len % wm_size;
> > +
> > +		j = 0;
> > +		if (size) {
> > +			/* Wait for RXFIFO available */
> > +			ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > +						   FSPI_INTR_IPRXWA, 0,
> > +						   POLL_TOUT, true);
> > +			WARN_ON(ret);
> > +
> > +			while (len > 0) {
> > +				tmp = 0;
> > +				size = (len < 4) ? len : 4;
> > +				tmp = fspi_readl(f, base + FSPI_RFDR + j * 4);
> > +				memcpy(buf, &tmp, size);
> > +				len -= size;
> > +				j++;
> > +				buf += size;
> > +			}
> > +		}
> > +
> > +		/* invalid the RXFIFO */
> > +		fspi_writel(f, FSPI_IPRXFCR_CLR, base + FSPI_IPRXFCR);
> > +		/* move the FIFO pointer */
> > +		fspi_writel(f, FSPI_INTR_IPRXWA, base + FSPI_INTR);
> > +	}
> 
> Same here. I think this is overly complicated.
> 
Yes, would do in next version.

> > +}
> > +
> > +static int nxp_fspi_do_op(struct nxp_fspi *f, const struct spi_mem_op
> > +*op) {
> > +	void __iomem *base = f->iobase;
> > +	int seqnum = 0;
> > +	int err = 0;
> > +	u32 reg;
> > +
> > +	reg = fspi_readl(f, base + FSPI_IPRXFCR);
> > +	/* invalid RXFIFO first */
> > +	reg &= ~FSPI_IPRXFCR_DMA_EN;
> > +	reg = reg | FSPI_IPRXFCR_CLR;
> > +	fspi_writel(f, reg, base + FSPI_IPRXFCR);
> > +
> > +	init_completion(&f->c);
> > +
> > +	fspi_writel(f, op->addr.val, base + FSPI_IPCR0);
> > +	/*
> > +	 * Always start the sequence at the same index since we update
> > +	 * the LUT at each exec_op() call. And also specify the DATA
> > +	 * length, since it's has not been specified in the LUT.
> > +	 */
> > +	fspi_writel(f, op->data.nbytes |
> > +		 (SEQID_LUT << FSPI_IPCR1_SEQID_SHIFT) |
> > +		 (seqnum << FSPI_IPCR1_SEQNUM_SHIFT),
> > +		 base + FSPI_IPCR1);
> > +
> > +	/* Trigger the LUT now. */
> > +	fspi_writel(f, FSPI_IPCMD_TRG, base + FSPI_IPCMD);
> > +
> > +	/* Wait for the interrupt. */
> > +	if (!wait_for_completion_timeout(&f->c, msecs_to_jiffies(1000)))
> > +		err = -ETIMEDOUT;
> > +
> > +	/* Invoke IP data read, if request is of data read. */
> > +	if (!err && op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN)
> > +		nxp_fspi_read_rxfifo(f, op);
> > +
> > +	return err;
> > +}
> > +
> > +static int nxp_fspi_exec_op(struct spi_mem *mem, const struct
> > +spi_mem_op *op) {
> > +	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
> > +	int err = 0;
> > +
> > +	mutex_lock(&f->lock);
> > +
> > +	/* Wait for controller being ready. */
> > +	err = fspi_readl_poll_tout(f, f->iobase + FSPI_STS0,
> > +				   FSPI_STS0_ARB_IDLE, 1, POLL_TOUT, true);
> > +	WARN_ON(err);
> > +
> > +	nxp_fspi_select_mem(f, mem->spi);
> > +
> > +	nxp_fspi_prepare_lut(f, op);
> > +	/*
> > +	 * If we have large chunks of data, we read them through the AHB bus
> > +	 * by accessing the mapped memory. In all other cases we use
> > +	 * IP commands to access the flash.
> > +	 */
> > +	if (op->data.nbytes > (f->devtype_data->rxfifo - 4) &&
> > +	    op->data.dir == SPI_MEM_DATA_IN) {
> > +		nxp_fspi_read_ahb(f, op);
> > +	} else {
> > +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
> > +			nxp_fspi_fill_txfifo(f, op);
> > +
> > +		err = nxp_fspi_do_op(f, op);
> > +
> > +		/* Invalidate the data in the AHB buffer. */
> > +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
> > +			nxp_fspi_invalid(f);
> 
> E.g. in case of an erase operation or a NAND load page operation, the
> invalidation is not triggered, but flash/buffer contents have changed.
> So I'm not sure if this is enough...
Ok, would change this and have invalidate for all operations.

> 
> > +	}
> > +
> > +	mutex_unlock(&f->lock);
> > +
> > +	return err;
> > +}
> > +
> > +static int nxp_fspi_adjust_op_size(struct spi_mem *mem, struct
> > +spi_mem_op *op) {
> > +	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
> > +
> > +	if (op->data.dir == SPI_MEM_DATA_OUT) {
> > +		if (op->data.nbytes > f->devtype_data->txfifo)
> > +			op->data.nbytes = f->devtype_data->txfifo;
> > +	} else {
> > +		if (op->data.nbytes > f->devtype_data->ahb_buf_size)
> > +			op->data.nbytes = f->devtype_data->ahb_buf_size;
> > +		else if (op->data.nbytes > (f->devtype_data->rxfifo - 4))
> > +			op->data.nbytes = ALIGN_DOWN(op->data.nbytes, 8);
> 
> You are using the same alignments as in the QSPI driver. So AHB reads will
> happen in portions of ahb_buf_size, but you dont' stick to this when you map the
> memory. See above.

Reason mentioned above.

--
Regards
Yogesh Gaur

> 
> Regards,
> Frieder
[...]
Boris Brezillon Dec. 10, 2018, 10:19 a.m. UTC | #3
On Mon, 10 Dec 2018 09:41:51 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> > > +/* Instead of busy looping invoke readl_poll_timeout functionality.
> > > +*/ static int fspi_readl_poll_tout(struct nxp_fspi *f, void __iomem *base,
> > > +				u32 mask, u32 delay_us,
> > > +				u32 timeout_us, bool condition)
> > > +{
> > > +	u32 reg;
> > > +
> > > +	if (!f->devtype_data->little_endian)
> > > +		mask = (u32)cpu_to_be32(mask);
> > > +
> > > +	if (condition)
> > > +		return readl_poll_timeout(base, reg, (reg & mask),
> > > +					  delay_us, timeout_us);
> > > +	else
> > > +		return readl_poll_timeout(base, reg, !(reg & mask),
> > > +					  delay_us, timeout_us);  
> > 
> > I would rather use a local variable to store the condition:
> > 
> > bool c = condition ? (reg & mask):!(reg & mask);
> >   
> With these type of usage getting below warning messages.
>  
> drivers/spi/spi-nxp-fspi.c: In function ‘fspi_readl_poll_tout.isra.10.constprop’:
> drivers/spi/spi-nxp-fspi.c:446:21: warning: ‘reg’ may be used uninitialized in this function [-Wmaybe-uninitialized]
>   bool cn = c ? (reg & mask) : !(reg & mask);
> 
> If assign value to reg = 0xffffffff then timeout is start getting hit for False case and if assign value 0 then start getting timeout hit for true case.
> 
> I would rather not try to modify this function.

I agree. Let's keep this function readable even if this implies
duplicating a few lines of code.

> 
> > return readl_poll_timeout(base, reg, c, delay_us, timeout_us);
> >   
> > > +}
> > > +
> > > +/*
> > > + * If the slave device content being changed by Write/Erase, need to
> > > + * invalidate the AHB buffer. This can be achieved by doing the reset
> > > + * of controller after setting MCR0[SWRESET] bit.
> > > + */
> > > +static inline void nxp_fspi_invalid(struct nxp_fspi *f) {
> > > +	u32 reg;
> > > +	int ret;
> > > +
> > > +	reg = fspi_readl(f, f->iobase + FSPI_MCR0);
> > > +	fspi_writel(f, reg | FSPI_MCR0_SWRST, f->iobase + FSPI_MCR0);
> > > +
> > > +	/* w1c register, wait unit clear */
> > > +	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
> > > +				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
> > > +	WARN_ON(ret);
> > > +}
> > > +
> > > +static void nxp_fspi_prepare_lut(struct nxp_fspi *f,
> > > +				 const struct spi_mem_op *op)
> > > +{
> > > +	void __iomem *base = f->iobase;
> > > +	u32 lutval[4] = {};
> > > +	int lutidx = 1, i;
> > > +
> > > +	/* cmd */
> > > +	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
> > > +			     op->cmd.opcode);
> > > +
> > > +	/* addr bus width */
> > > +	if (op->addr.nbytes) {
> > > +		u32 addrlen = 0;
> > > +
> > > +		switch (op->addr.nbytes) {
> > > +		case 1:
> > > +			addrlen = ADDR8BIT;
> > > +			break;
> > > +		case 2:
> > > +			addrlen = ADDR16BIT;
> > > +			break;
> > > +		case 3:
> > > +			addrlen = ADDR24BIT;
> > > +			break;
> > > +		case 4:
> > > +			addrlen = ADDR32BIT;
> > > +			break;
> > > +		default:
> > > +			dev_err(f->dev, "In-correct address length\n");
> > > +			return;
> > > +		}  
> > 
> > You don't need to validate op->addr.nbytes here, this is already done in
> > nxp_fspi_supports_op().  
> 
> Yes, I need to validate op->addr.nbytes else LUT would going to be programmed for 0 addrlen.
> I have checked this on the target.

Also agree there. Some operations have 0 address bytes. We could also
test addr.buswidth, but I'm fine with the addr.nbytes test too.


> > > +static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device
> > > +*spi) {
> > > +	unsigned long rate = spi->max_speed_hz;
> > > +	int ret;
> > > +	uint64_t size_kb;
> > > +
> > > +	/*
> > > +	 * Return, if previously selected slave device is same as current
> > > +	 * requested slave device.
> > > +	 */
> > > +	if (f->selected == spi->chip_select)
> > > +		return;
> > > +
> > > +	/* Reset FLSHxxCR0 registers */
> > > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA1CR0);
> > > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA2CR0);
> > > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB1CR0);
> > > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB2CR0);
> > > +
> > > +	/* Assign controller memory mapped space as size, KBytes, of flash. */
> > > +	size_kb = FSPI_FLSHXCR0_SZ(f->memmap_phy_size);  
> >   
> Above description of this function, explains the reason for using memmap_phy_size.
> This is not the arbitrary size, but the memory mapped size being assigned to the controller.
> 
> > You are still using memory of arbitrary size (memmap_phy_size) for mapping the
> > flash. Why not use the same approach as in the QSPI driver and just map
> > ahb_buf_size until we implement the dirmap API?  
> The approach which being used in QSPI driver didn't work here, I have tried with that.
> In QSPI driver, while preparing LUT we are assigning read/write address in the LUT preparation and have to for some unknown hack have to provide macro for LUT_MODE instead of LUT_ADDR.
> But this thing didn't work for FlexSPI.
> I discussed with HW IP owner and they suggested only to use LUT_ADDR for specifying the address length of the command i.e. 3-byte or 4-byte address command (NOR) or 1-2 byte address command for NAND.

Actually, we would have used a LUT_ADDR too if the QSPI IP was support
ADDR instructions with a number of bytes < 3, but for some unknown
reasons it does not work. 

> 
> Thus, in LUT preparation we have assigned only the base address.
> Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register then for read/write data beyond limit of ahb_buf_size offset I get data corruption.

Why would you do that? We have the ->adjust_op_size() exactly for this
reason, so, if someone tries to do a spi_mem_op with data.nbytes >
ahb_buf_size you should return an error.

> 
> Thus, for generic approach have assigned FSPI_FLSHXXCR0 equal to the memory mapped size to the controller. This would also not going to depend on the number of CS present on the target.

I kind of agree with Frieder on that one, I think it's preferable to
limit the per-read-op size to ahb_buf_size and let the upper layer
split the request in several sub-requests. On the controller side of
things, you just have to have a mapping of ahb_buf_size per-CS. If you
want to further optimize things, implement the dirmap hooks.

> 
> > You are already aligning the AHB reads for this in nxp_fspi_adjust_op_size().
> >   
> Yes, max read data size can be ahb_buf_size. Thus we need to check max read size with ahb_buf_size.

Well, it's never a bad thing to check it twice, just in case the
spi-mem user is misusing the API.

> > > +static void nxp_fspi_fill_txfifo(struct nxp_fspi *f,
> > > +				 const struct spi_mem_op *op)
> > > +{
> > > +	void __iomem *base = f->iobase;
> > > +	int i, j, ret;
> > > +	int size, tmp_size, wm_size;
> > > +	u32 data = 0;
> > > +	u32 *txbuf = (u32 *) op->data.buf.out;
> > > +
> > > +	/* clear the TX FIFO. */
> > > +	fspi_writel(f, FSPI_IPTXFCR_CLR, base + FSPI_IPTXFCR);
> > > +
> > > +	/* Default value of water mark level is 8 bytes. */
> > > +	wm_size = 8;
> > > +	size = op->data.nbytes / wm_size;
> > > +	for (i = 0; i < size; i++) {
> > > +		/* Wait for TXFIFO empty */
> > > +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > > +					   FSPI_INTR_IPTXWE, 0,
> > > +					   POLL_TOUT, true);
> > > +		WARN_ON(ret);
> > > +
> > > +		j = 0;
> > > +		tmp_size = wm_size;
> > > +		while (tmp_size > 0) {
> > > +			data = 0;
> > > +			memcpy(&data, txbuf, 4);
> > > +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
> > > +			tmp_size -= 4;
> > > +			j++;
> > > +			txbuf += 1;
> > > +		}
> > > +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
> > > +	}
> > > +
> > > +	size = op->data.nbytes % wm_size;
> > > +	if (size) {
> > > +		/* Wait for TXFIFO empty */
> > > +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > > +					   FSPI_INTR_IPTXWE, 0,
> > > +					   POLL_TOUT, true);
> > > +		WARN_ON(ret);
> > > +
> > > +		j = 0;
> > > +		tmp_size = 0;
> > > +		while (size > 0) {
> > > +			data = 0;
> > > +			tmp_size = (size < 4) ? size : 4;
> > > +			memcpy(&data, txbuf, tmp_size);
> > > +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
> > > +			size -= tmp_size;
> > > +			j++;
> > > +			txbuf += 1;
> > > +		}
> > > +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
> > > +	}  
> > 
> > All these nested loops to fill the TX buffer and also the ones below to read the
> > RX buffer look much more complicated than they should really be. Can you try to
> > make this more readable?  
> Yes
> > 
> > Maybe something like this would work:
> > 
> > for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 8); i += 8) {
> > 	/* Wait for TXFIFO empty */
> > 	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > 				   FSPI_INTR_IPTXWE, 0,
> > 				   POLL_TOUT, true);
> > 
> > 	fspi_writel(f, op->data.buf.out + i, base + FSPI_TFDR);
> > 	fspi_writel(f, op->data.buf.out + i + 4, base + FSPI_TFDR + 4);
> > 	fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR); }  
> With this above 2 lines we are hardcoding it for read/write with watermark size as 8 bytes.
> Watermark size can be variable and depends on the value of IPRXFCR/IPTXFCR register with default value as 8 bytes
> Thus, I would still prefer to use the internal for loop instead of 2 fspi_writel(...) for FSPI_TFDR and FSPI_TFDR + 4 register write commands.

Just like you're hardcoding wm_size to 8, so I don't see a difference
here. And I indeed prefer Frieder's version.
Frieder Schrempf Dec. 10, 2018, 10:31 a.m. UTC | #4
Hi Yogesh, Boris,

On 10.12.18 11:19, Boris Brezillon wrote:
> On Mon, 10 Dec 2018 09:41:51 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
>>>> +/* Instead of busy looping invoke readl_poll_timeout functionality.
>>>> +*/ static int fspi_readl_poll_tout(struct nxp_fspi *f, void __iomem *base,
>>>> +				u32 mask, u32 delay_us,
>>>> +				u32 timeout_us, bool condition)
>>>> +{
>>>> +	u32 reg;
>>>> +
>>>> +	if (!f->devtype_data->little_endian)
>>>> +		mask = (u32)cpu_to_be32(mask);
>>>> +
>>>> +	if (condition)
>>>> +		return readl_poll_timeout(base, reg, (reg & mask),
>>>> +					  delay_us, timeout_us);
>>>> +	else
>>>> +		return readl_poll_timeout(base, reg, !(reg & mask),
>>>> +					  delay_us, timeout_us);
>>>
>>> I would rather use a local variable to store the condition:
>>>
>>> bool c = condition ? (reg & mask):!(reg & mask);
>>>    
>> With these type of usage getting below warning messages.
>>   
>> drivers/spi/spi-nxp-fspi.c: In function ‘fspi_readl_poll_tout.isra.10.constprop’:
>> drivers/spi/spi-nxp-fspi.c:446:21: warning: ‘reg’ may be used uninitialized in this function [-Wmaybe-uninitialized]
>>    bool cn = c ? (reg & mask) : !(reg & mask);
>>
>> If assign value to reg = 0xffffffff then timeout is start getting hit for False case and if assign value 0 then start getting timeout hit for true case.
>>
>> I would rather not try to modify this function.
> 
> I agree. Let's keep this function readable even if this implies
> duplicating a few lines of code.

My bad. This doesn't work of course. We need to pass the actual 
expression containing reg to the readl_poll_timeout() macro. So forget 
about my comment.

> 
>>
>>> return readl_poll_timeout(base, reg, c, delay_us, timeout_us);
>>>    
>>>> +}
>>>> +
>>>> +/*
>>>> + * If the slave device content being changed by Write/Erase, need to
>>>> + * invalidate the AHB buffer. This can be achieved by doing the reset
>>>> + * of controller after setting MCR0[SWRESET] bit.
>>>> + */
>>>> +static inline void nxp_fspi_invalid(struct nxp_fspi *f) {
>>>> +	u32 reg;
>>>> +	int ret;
>>>> +
>>>> +	reg = fspi_readl(f, f->iobase + FSPI_MCR0);
>>>> +	fspi_writel(f, reg | FSPI_MCR0_SWRST, f->iobase + FSPI_MCR0);
>>>> +
>>>> +	/* w1c register, wait unit clear */
>>>> +	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
>>>> +				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
>>>> +	WARN_ON(ret);
>>>> +}
>>>> +
>>>> +static void nxp_fspi_prepare_lut(struct nxp_fspi *f,
>>>> +				 const struct spi_mem_op *op)
>>>> +{
>>>> +	void __iomem *base = f->iobase;
>>>> +	u32 lutval[4] = {};
>>>> +	int lutidx = 1, i;
>>>> +
>>>> +	/* cmd */
>>>> +	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
>>>> +			     op->cmd.opcode);
>>>> +
>>>> +	/* addr bus width */
>>>> +	if (op->addr.nbytes) {
>>>> +		u32 addrlen = 0;
>>>> +
>>>> +		switch (op->addr.nbytes) {
>>>> +		case 1:
>>>> +			addrlen = ADDR8BIT;
>>>> +			break;
>>>> +		case 2:
>>>> +			addrlen = ADDR16BIT;
>>>> +			break;
>>>> +		case 3:
>>>> +			addrlen = ADDR24BIT;
>>>> +			break;
>>>> +		case 4:
>>>> +			addrlen = ADDR32BIT;
>>>> +			break;
>>>> +		default:
>>>> +			dev_err(f->dev, "In-correct address length\n");
>>>> +			return;
>>>> +		}
>>>
>>> You don't need to validate op->addr.nbytes here, this is already done in
>>> nxp_fspi_supports_op().
>>
>> Yes, I need to validate op->addr.nbytes else LUT would going to be programmed for 0 addrlen.
>> I have checked this on the target.
> 
> Also agree there. Some operations have 0 address bytes. We could also
> test addr.buswidth, but I'm fine with the addr.nbytes test too.

The "if (op->addr.nbytes)" is needed of course, but I think the default 
case in the switch statement (and for other reasons the whole switch 
statement) is not needed and rather a check for op->addr.nbytes > 4 
should be added to nxp_fspi_supports_op(). I wrongly assumed this check 
already exists in nxp_fspi_supports_op().

> 
>>>> +static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device
>>>> +*spi) {
>>>> +	unsigned long rate = spi->max_speed_hz;
>>>> +	int ret;
>>>> +	uint64_t size_kb;
>>>> +
>>>> +	/*
>>>> +	 * Return, if previously selected slave device is same as current
>>>> +	 * requested slave device.
>>>> +	 */
>>>> +	if (f->selected == spi->chip_select)
>>>> +		return;
>>>> +
>>>> +	/* Reset FLSHxxCR0 registers */
>>>> +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA1CR0);
>>>> +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA2CR0);
>>>> +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB1CR0);
>>>> +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB2CR0);
>>>> +
>>>> +	/* Assign controller memory mapped space as size, KBytes, of flash. */
>>>> +	size_kb = FSPI_FLSHXCR0_SZ(f->memmap_phy_size);
>>>    
>> Above description of this function, explains the reason for using memmap_phy_size.
>> This is not the arbitrary size, but the memory mapped size being assigned to the controller.
>>
>>> You are still using memory of arbitrary size (memmap_phy_size) for mapping the
>>> flash. Why not use the same approach as in the QSPI driver and just map
>>> ahb_buf_size until we implement the dirmap API?
>> The approach which being used in QSPI driver didn't work here, I have tried with that.
>> In QSPI driver, while preparing LUT we are assigning read/write address in the LUT preparation and have to for some unknown hack have to provide macro for LUT_MODE instead of LUT_ADDR.
>> But this thing didn't work for FlexSPI.
>> I discussed with HW IP owner and they suggested only to use LUT_ADDR for specifying the address length of the command i.e. 3-byte or 4-byte address command (NOR) or 1-2 byte address command for NAND.
> 
> Actually, we would have used a LUT_ADDR too if the QSPI IP was support
> ADDR instructions with a number of bytes < 3, but for some unknown
> reasons it does not work.
> 
>>
>> Thus, in LUT preparation we have assigned only the base address.
>> Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register then for read/write data beyond limit of ahb_buf_size offset I get data corruption.
> 
> Why would you do that? We have the ->adjust_op_size() exactly for this
> reason, so, if someone tries to do a spi_mem_op with data.nbytes >
> ahb_buf_size you should return an error.
> 
>>
>> Thus, for generic approach have assigned FSPI_FLSHXXCR0 equal to the memory mapped size to the controller. This would also not going to depend on the number of CS present on the target.
> 
> I kind of agree with Frieder on that one, I think it's preferable to
> limit the per-read-op size to ahb_buf_size and let the upper layer
> split the request in several sub-requests. On the controller side of
> things, you just have to have a mapping of ahb_buf_size per-CS. If you
> want to further optimize things, implement the dirmap hooks.
> 
>>
>>> You are already aligning the AHB reads for this in nxp_fspi_adjust_op_size().
>>>    
>> Yes, max read data size can be ahb_buf_size. Thus we need to check max read size with ahb_buf_size.
> 
> Well, it's never a bad thing to check it twice, just in case the
> spi-mem user is misusing the API.
> 
>>>> +static void nxp_fspi_fill_txfifo(struct nxp_fspi *f,
>>>> +				 const struct spi_mem_op *op)
>>>> +{
>>>> +	void __iomem *base = f->iobase;
>>>> +	int i, j, ret;
>>>> +	int size, tmp_size, wm_size;
>>>> +	u32 data = 0;
>>>> +	u32 *txbuf = (u32 *) op->data.buf.out;
>>>> +
>>>> +	/* clear the TX FIFO. */
>>>> +	fspi_writel(f, FSPI_IPTXFCR_CLR, base + FSPI_IPTXFCR);
>>>> +
>>>> +	/* Default value of water mark level is 8 bytes. */
>>>> +	wm_size = 8;
>>>> +	size = op->data.nbytes / wm_size;
>>>> +	for (i = 0; i < size; i++) {
>>>> +		/* Wait for TXFIFO empty */
>>>> +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
>>>> +					   FSPI_INTR_IPTXWE, 0,
>>>> +					   POLL_TOUT, true);
>>>> +		WARN_ON(ret);
>>>> +
>>>> +		j = 0;
>>>> +		tmp_size = wm_size;
>>>> +		while (tmp_size > 0) {
>>>> +			data = 0;
>>>> +			memcpy(&data, txbuf, 4);
>>>> +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
>>>> +			tmp_size -= 4;
>>>> +			j++;
>>>> +			txbuf += 1;
>>>> +		}
>>>> +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
>>>> +	}
>>>> +
>>>> +	size = op->data.nbytes % wm_size;
>>>> +	if (size) {
>>>> +		/* Wait for TXFIFO empty */
>>>> +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
>>>> +					   FSPI_INTR_IPTXWE, 0,
>>>> +					   POLL_TOUT, true);
>>>> +		WARN_ON(ret);
>>>> +
>>>> +		j = 0;
>>>> +		tmp_size = 0;
>>>> +		while (size > 0) {
>>>> +			data = 0;
>>>> +			tmp_size = (size < 4) ? size : 4;
>>>> +			memcpy(&data, txbuf, tmp_size);
>>>> +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
>>>> +			size -= tmp_size;
>>>> +			j++;
>>>> +			txbuf += 1;
>>>> +		}
>>>> +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
>>>> +	}
>>>
>>> All these nested loops to fill the TX buffer and also the ones below to read the
>>> RX buffer look much more complicated than they should really be. Can you try to
>>> make this more readable?
>> Yes
>>>
>>> Maybe something like this would work:
>>>
>>> for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 8); i += 8) {
>>> 	/* Wait for TXFIFO empty */
>>> 	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
>>> 				   FSPI_INTR_IPTXWE, 0,
>>> 				   POLL_TOUT, true);
>>>
>>> 	fspi_writel(f, op->data.buf.out + i, base + FSPI_TFDR);
>>> 	fspi_writel(f, op->data.buf.out + i + 4, base + FSPI_TFDR + 4);
>>> 	fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR); }
>> With this above 2 lines we are hardcoding it for read/write with watermark size as 8 bytes.
>> Watermark size can be variable and depends on the value of IPRXFCR/IPTXFCR register with default value as 8 bytes
>> Thus, I would still prefer to use the internal for loop instead of 2 fspi_writel(...) for FSPI_TFDR and FSPI_TFDR + 4 register write commands.
> 
> Just like you're hardcoding wm_size to 8, so I don't see a difference
> here. And I indeed prefer Frieder's version.

Yes, as long as the watermark level is fixed, we don't need the inner loop.
Frieder Schrempf Dec. 10, 2018, 10:35 a.m. UTC | #5
Hi Yogesh,

On 10.12.18 10:41, Yogesh Narayan Gaur wrote:
[...]>>> +
>>> +static void nxp_fspi_prepare_lut(struct nxp_fspi *f,
>>> +				 const struct spi_mem_op *op)
>>> +{
>>> +	void __iomem *base = f->iobase;
>>> +	u32 lutval[4] = {};
>>> +	int lutidx = 1, i;
>>> +
>>> +	/* cmd */
>>> +	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
>>> +			     op->cmd.opcode);
>>> +
>>> +	/* addr bus width */

This comment should match the code below. So maybe only "addr" should be 
enough.

>>> +	if (op->addr.nbytes) {
>>> +		u32 addrlen = 0;
>>> +
>>> +		switch (op->addr.nbytes) {
>>> +		case 1:
>>> +			addrlen = ADDR8BIT;
>>> +			break;
>>> +		case 2:
>>> +			addrlen = ADDR16BIT;
>>> +			break;
>>> +		case 3:
>>> +			addrlen = ADDR24BIT;
>>> +			break;
>>> +		case 4:
>>> +			addrlen = ADDR32BIT;
>>> +			break;
>>> +		default:
>>> +			dev_err(f->dev, "In-correct address length\n");
>>> +			return;
>>> +		}
>>
>> You don't need to validate op->addr.nbytes here, this is already done in
>> nxp_fspi_supports_op().
> 
> Yes, I need to validate op->addr.nbytes else LUT would going to be programmed for 0 addrlen.
> I have checked this on the target.
> 
>>
>>> +
>>> +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_ADDR,
>>> +					      LUT_PAD(op->addr.buswidth),
>>> +					      addrlen);
>>
>> You can also just remove the whole switch statement above and use this:
>>
>> lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_ADDR,
>> 			      LUT_PAD(op->addr.buswidth),
>> 			      op->addr.nbytes * 8);
>>
> Ok, would include in next version.
> 
>>> +		lutidx++;
>>> +	}
>>> +
>>> +	/* dummy bytes, if needed */
>>> +	if (op->dummy.nbytes) {
>>> +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
>>> +		/*
>>> +		 * Due to FlexSPI controller limitation number of PAD for
>> dummy
>>> +		 * buswidth needs to be programmed as equal to data buswidth.
>>> +		 */
>>> +					      LUT_PAD(op->data.buswidth),
>>> +					      op->dummy.nbytes * 8 /
>>> +					      op->dummy.buswidth);
>>> +		lutidx++;
>>> +	}
>>> +
>>> +	/* read/write data bytes */
>>> +	if (op->data.nbytes) {
>>> +		lutval[lutidx / 2] |= LUT_DEF(lutidx,
>>> +					      op->data.dir ==
>> SPI_MEM_DATA_IN ?
>>> +					      LUT_NXP_READ : LUT_NXP_WRITE,
>>> +					      LUT_PAD(op->data.buswidth),
>>> +					      0);
>>> +		lutidx++;
>>> +	}
>>> +
>>> +	/* stop condition. */
>>> +	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
>>> +
>>> +	/* unlock LUT */
>>> +	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
>>> +	fspi_writel(f, FSPI_LCKER_UNLOCK, f->iobase + FSPI_LCKCR);
>>> +
>>> +	/* fill LUT */
>>> +	for (i = 0; i < ARRAY_SIZE(lutval); i++)
>>> +		fspi_writel(f, lutval[i], base + FSPI_LUT_REG(i));
>>> +
>>> +	dev_dbg(f->dev, "CMD[%x] lutval[0:%x \t 1:%x \t 2:%x \t 3:%x]\n",
>>> +		op->cmd.opcode, lutval[0], lutval[1], lutval[2], lutval[3]);
>>> +
>>> +	/* lock LUT */
>>> +	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
>>> +	fspi_writel(f, FSPI_LCKER_LOCK, f->iobase + FSPI_LCKCR); }
[...]
>>> +
>>> +static int nxp_fspi_exec_op(struct spi_mem *mem, const struct
>>> +spi_mem_op *op) {
>>> +	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
>>> +	int err = 0;
>>> +
>>> +	mutex_lock(&f->lock);
>>> +
>>> +	/* Wait for controller being ready. */
>>> +	err = fspi_readl_poll_tout(f, f->iobase + FSPI_STS0,
>>> +				   FSPI_STS0_ARB_IDLE, 1, POLL_TOUT, true);
>>> +	WARN_ON(err);
>>> +
>>> +	nxp_fspi_select_mem(f, mem->spi);
>>> +
>>> +	nxp_fspi_prepare_lut(f, op);
>>> +	/*
>>> +	 * If we have large chunks of data, we read them through the AHB bus
>>> +	 * by accessing the mapped memory. In all other cases we use
>>> +	 * IP commands to access the flash.
>>> +	 */
>>> +	if (op->data.nbytes > (f->devtype_data->rxfifo - 4) &&
>>> +	    op->data.dir == SPI_MEM_DATA_IN) {
>>> +		nxp_fspi_read_ahb(f, op);
>>> +	} else {
>>> +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
>>> +			nxp_fspi_fill_txfifo(f, op);
>>> +
>>> +		err = nxp_fspi_do_op(f, op);
>>> +
>>> +		/* Invalidate the data in the AHB buffer. */
>>> +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
>>> +			nxp_fspi_invalid(f);
>>
>> E.g. in case of an erase operation or a NAND load page operation, the
>> invalidation is not triggered, but flash/buffer contents have changed.
>> So I'm not sure if this is enough...
> Ok, would change this and have invalidate for all operations.

Maybe you can find out the correct way through testing with NOR and NAND.
Boris Brezillon Dec. 10, 2018, 10:36 a.m. UTC | #6
On Mon, 10 Dec 2018 10:31:57 +0000
Schrempf Frieder <frieder.schrempf@kontron.de> wrote:

> >> Yes, I need to validate op->addr.nbytes else LUT would going to be programmed for 0 addrlen.
> >> I have checked this on the target.  
> > 
> > Also agree there. Some operations have 0 address bytes. We could also
> > test addr.buswidth, but I'm fine with the addr.nbytes test too.  
> 
> The "if (op->addr.nbytes)" is needed of course, but I think the default 
> case in the switch statement (and for other reasons the whole switch 
> statement) is not needed and rather a check for op->addr.nbytes > 4 
> should be added to nxp_fspi_supports_op(). I wrongly assumed this check 
> already exists in nxp_fspi_supports_op().

Ok, then this check on the max number of address bytes should indeed be
moved to the supports_op() implementation.
Boris Brezillon Dec. 10, 2018, 10:41 a.m. UTC | #7
On Mon, 10 Dec 2018 10:35:35 +0000
Schrempf Frieder <frieder.schrempf@kontron.de> wrote:

> >>> +
> >>> +static int nxp_fspi_exec_op(struct spi_mem *mem, const struct
> >>> +spi_mem_op *op) {
> >>> +	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
> >>> +	int err = 0;
> >>> +
> >>> +	mutex_lock(&f->lock);
> >>> +
> >>> +	/* Wait for controller being ready. */
> >>> +	err = fspi_readl_poll_tout(f, f->iobase + FSPI_STS0,
> >>> +				   FSPI_STS0_ARB_IDLE, 1, POLL_TOUT, true);
> >>> +	WARN_ON(err);
> >>> +
> >>> +	nxp_fspi_select_mem(f, mem->spi);
> >>> +
> >>> +	nxp_fspi_prepare_lut(f, op);
> >>> +	/*
> >>> +	 * If we have large chunks of data, we read them through the AHB bus
> >>> +	 * by accessing the mapped memory. In all other cases we use
> >>> +	 * IP commands to access the flash.
> >>> +	 */
> >>> +	if (op->data.nbytes > (f->devtype_data->rxfifo - 4) &&
> >>> +	    op->data.dir == SPI_MEM_DATA_IN) {
> >>> +		nxp_fspi_read_ahb(f, op);
> >>> +	} else {
> >>> +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
> >>> +			nxp_fspi_fill_txfifo(f, op);
> >>> +
> >>> +		err = nxp_fspi_do_op(f, op);
> >>> +
> >>> +		/* Invalidate the data in the AHB buffer. */
> >>> +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
> >>> +			nxp_fspi_invalid(f);  
> >>
> >> E.g. in case of an erase operation or a NAND load page operation, the
> >> invalidation is not triggered, but flash/buffer contents have changed.
> >> So I'm not sure if this is enough...  
> > Ok, would change this and have invalidate for all operations.  
> 
> Maybe you can find out the correct way through testing with NOR and NAND.

Or just invalidate the buffer every time you're doing a read through the
AHB. This should always work.

I also think we should quickly move to a model where AHB accesses are
reserved for dirmap, and regular spi-mem op are limited to non-ahb
reads.
Yogesh Narayan Gaur Dec. 10, 2018, 10:43 a.m. UTC | #8
Hi Boris, Frieder,

> -----Original Message-----
> From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> Sent: Monday, December 10, 2018 3:49 PM
> To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
> Cc: Schrempf Frieder <frieder.schrempf@kontron.de>; linux-
> mtd@lists.infradead.org; marek.vasut@gmail.com; broonie@kernel.org; linux-
> spi@vger.kernel.org; devicetree@vger.kernel.org; robh@kernel.org;
> mark.rutland@arm.com; shawnguo@kernel.org; linux-arm-
> kernel@lists.infradead.org; computersforpeace@gmail.com; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI controller
> 
> On Mon, 10 Dec 2018 09:41:51 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
> > > > +/* Instead of busy looping invoke readl_poll_timeout functionality.
> > > > +*/ static int fspi_readl_poll_tout(struct nxp_fspi *f, void __iomem *base,
> > > > +				u32 mask, u32 delay_us,
> > > > +				u32 timeout_us, bool condition) {
> > > > +	u32 reg;
> > > > +
> > > > +	if (!f->devtype_data->little_endian)
> > > > +		mask = (u32)cpu_to_be32(mask);
> > > > +
> > > > +	if (condition)
> > > > +		return readl_poll_timeout(base, reg, (reg & mask),
> > > > +					  delay_us, timeout_us);
> > > > +	else
> > > > +		return readl_poll_timeout(base, reg, !(reg & mask),
> > > > +					  delay_us, timeout_us);
> > >
> > > I would rather use a local variable to store the condition:
> > >
> > > bool c = condition ? (reg & mask):!(reg & mask);
> > >
> > With these type of usage getting below warning messages.
> >
> > drivers/spi/spi-nxp-fspi.c: In function ‘fspi_readl_poll_tout.isra.10.constprop’:
> > drivers/spi/spi-nxp-fspi.c:446:21: warning: ‘reg’ may be used uninitialized in
> this function [-Wmaybe-uninitialized]
> >   bool cn = c ? (reg & mask) : !(reg & mask);
> >
> > If assign value to reg = 0xffffffff then timeout is start getting hit for False case
> and if assign value 0 then start getting timeout hit for true case.
> >
> > I would rather not try to modify this function.
> 
> I agree. Let's keep this function readable even if this implies duplicating a few
> lines of code.
> 
> >
> > > return readl_poll_timeout(base, reg, c, delay_us, timeout_us);
> > >
> > > > +}
> > > > +
> > > > +/*
> > > > + * If the slave device content being changed by Write/Erase, need
> > > > +to
> > > > + * invalidate the AHB buffer. This can be achieved by doing the
> > > > +reset
> > > > + * of controller after setting MCR0[SWRESET] bit.
> > > > + */
> > > > +static inline void nxp_fspi_invalid(struct nxp_fspi *f) {
> > > > +	u32 reg;
> > > > +	int ret;
> > > > +
> > > > +	reg = fspi_readl(f, f->iobase + FSPI_MCR0);
> > > > +	fspi_writel(f, reg | FSPI_MCR0_SWRST, f->iobase + FSPI_MCR0);
> > > > +
> > > > +	/* w1c register, wait unit clear */
> > > > +	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
> > > > +				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
> > > > +	WARN_ON(ret);
> > > > +}
> > > > +
> > > > +static void nxp_fspi_prepare_lut(struct nxp_fspi *f,
> > > > +				 const struct spi_mem_op *op) {
> > > > +	void __iomem *base = f->iobase;
> > > > +	u32 lutval[4] = {};
> > > > +	int lutidx = 1, i;
> > > > +
> > > > +	/* cmd */
> > > > +	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
> > > > +			     op->cmd.opcode);
> > > > +
> > > > +	/* addr bus width */
> > > > +	if (op->addr.nbytes) {
> > > > +		u32 addrlen = 0;
> > > > +
> > > > +		switch (op->addr.nbytes) {
> > > > +		case 1:
> > > > +			addrlen = ADDR8BIT;
> > > > +			break;
> > > > +		case 2:
> > > > +			addrlen = ADDR16BIT;
> > > > +			break;
> > > > +		case 3:
> > > > +			addrlen = ADDR24BIT;
> > > > +			break;
> > > > +		case 4:
> > > > +			addrlen = ADDR32BIT;
> > > > +			break;
> > > > +		default:
> > > > +			dev_err(f->dev, "In-correct address length\n");
> > > > +			return;
> > > > +		}
> > >
> > > You don't need to validate op->addr.nbytes here, this is already
> > > done in nxp_fspi_supports_op().
> >
> > Yes, I need to validate op->addr.nbytes else LUT would going to be
> programmed for 0 addrlen.
> > I have checked this on the target.
> 
> Also agree there. Some operations have 0 address bytes. We could also test
> addr.buswidth, but I'm fine with the addr.nbytes test too.
> 
> 
> > > > +static void nxp_fspi_select_mem(struct nxp_fspi *f, struct
> > > > +spi_device
> > > > +*spi) {
> > > > +	unsigned long rate = spi->max_speed_hz;
> > > > +	int ret;
> > > > +	uint64_t size_kb;
> > > > +
> > > > +	/*
> > > > +	 * Return, if previously selected slave device is same as current
> > > > +	 * requested slave device.
> > > > +	 */
> > > > +	if (f->selected == spi->chip_select)
> > > > +		return;
> > > > +
> > > > +	/* Reset FLSHxxCR0 registers */
> > > > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA1CR0);
> > > > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHA2CR0);
> > > > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB1CR0);
> > > > +	fspi_writel(f, 0, f->iobase + FSPI_FLSHB2CR0);
> > > > +
> > > > +	/* Assign controller memory mapped space as size, KBytes, of flash. */
> > > > +	size_kb = FSPI_FLSHXCR0_SZ(f->memmap_phy_size);
> > >
> > Above description of this function, explains the reason for using
> memmap_phy_size.
> > This is not the arbitrary size, but the memory mapped size being assigned to
> the controller.
> >
> > > You are still using memory of arbitrary size (memmap_phy_size) for
> > > mapping the flash. Why not use the same approach as in the QSPI
> > > driver and just map ahb_buf_size until we implement the dirmap API?
> > The approach which being used in QSPI driver didn't work here, I have tried
> with that.
> > In QSPI driver, while preparing LUT we are assigning read/write address in the
> LUT preparation and have to for some unknown hack have to provide macro for
> LUT_MODE instead of LUT_ADDR.
> > But this thing didn't work for FlexSPI.
> > I discussed with HW IP owner and they suggested only to use LUT_ADDR for
> specifying the address length of the command i.e. 3-byte or 4-byte address
> command (NOR) or 1-2 byte address command for NAND.
> 
> Actually, we would have used a LUT_ADDR too if the QSPI IP was support ADDR
> instructions with a number of bytes < 3, but for some unknown reasons it does
> not work.
> 
> >
> > Thus, in LUT preparation we have assigned only the base address.
> > Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register then for
> read/write data beyond limit of ahb_buf_size offset I get data corruption.
> 
> Why would you do that? We have the ->adjust_op_size() exactly for this reason,
> so, if someone tries to do a spi_mem_op with data.nbytes > ahb_buf_size you
> should return an error.
> 
Let me explain my implementation with example. If I have to write data of size 0x100 bytes at offset 0x1200 for CS1, I would program as below:
In func nxp_fspi_select_mem(), would set value of controller address space size, memmap_phy_size, to FSPI_FLSHA2CR0 and rest all FSPI_FLSHXXCR0 as 0. 
Value of memmap_phy_size is 0x10000000 i.e. 256 MB for my LX2160ARDB target.
Then in nxp_fspi_prepare_lut(), I would prepare LUT ADDR with address length requirement 3/4 byte for NOR or 1/2/3/4 bytes for NAND flash.
Also for LUT_NXP_WRITE would program data bytes as 0.

Then inside func nxp_fspi_do_op(), set register FSPI_IPCR0 as the address offset i.e. 0x1200 and in register FSPI_IPCR1 program the data size to write i.e. 0x100

If, as suggested if I tries to mark value of register FSPI_FLSHA2CR0 equal to ahb_buf_size (0x800), then access for address 0x1200 gives me wrong data. This is because as per the controller specification access to flash connected at CS1 can be performed under range of FSPI_ FLSHA1CR0 and FSPI_ FLSHA2CR0.
So either FSPI_ FLSHA2CR0 should have the value of the actual connected flash size but we don't have mechanism to know the flash size in current implementation.
Thus instead of using some other arbitrary value, I have used the full size being allocated to the FlexSPI controller.

> >
> > Thus, for generic approach have assigned FSPI_FLSHXXCR0 equal to the
> memory mapped size to the controller. This would also not going to depend on
> the number of CS present on the target.
> 
> I kind of agree with Frieder on that one, I think it's preferable to limit the per-
> read-op size to ahb_buf_size and let the upper layer split the request in several
> sub-requests. On the controller side of things, you just have to have a mapping
> of ahb_buf_size per-CS. If you want to further optimize things, implement the
> dirmap hooks.
> 
> >
> > > You are already aligning the AHB reads for this in nxp_fspi_adjust_op_size().
> > >
> > Yes, max read data size can be ahb_buf_size. Thus we need to check max read
> size with ahb_buf_size.
> 
> Well, it's never a bad thing to check it twice, just in case the spi-mem user is
> misusing the API.
> 
> > > > +static void nxp_fspi_fill_txfifo(struct nxp_fspi *f,
> > > > +				 const struct spi_mem_op *op) {
> > > > +	void __iomem *base = f->iobase;
> > > > +	int i, j, ret;
> > > > +	int size, tmp_size, wm_size;
> > > > +	u32 data = 0;
> > > > +	u32 *txbuf = (u32 *) op->data.buf.out;
> > > > +
> > > > +	/* clear the TX FIFO. */
> > > > +	fspi_writel(f, FSPI_IPTXFCR_CLR, base + FSPI_IPTXFCR);
> > > > +
> > > > +	/* Default value of water mark level is 8 bytes. */
> > > > +	wm_size = 8;
> > > > +	size = op->data.nbytes / wm_size;
> > > > +	for (i = 0; i < size; i++) {
> > > > +		/* Wait for TXFIFO empty */
> > > > +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > > > +					   FSPI_INTR_IPTXWE, 0,
> > > > +					   POLL_TOUT, true);
> > > > +		WARN_ON(ret);
> > > > +
> > > > +		j = 0;
> > > > +		tmp_size = wm_size;
> > > > +		while (tmp_size > 0) {
> > > > +			data = 0;
> > > > +			memcpy(&data, txbuf, 4);
> > > > +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
> > > > +			tmp_size -= 4;
> > > > +			j++;
> > > > +			txbuf += 1;
> > > > +		}
> > > > +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
> > > > +	}
> > > > +
> > > > +	size = op->data.nbytes % wm_size;
> > > > +	if (size) {
> > > > +		/* Wait for TXFIFO empty */
> > > > +		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > > > +					   FSPI_INTR_IPTXWE, 0,
> > > > +					   POLL_TOUT, true);
> > > > +		WARN_ON(ret);
> > > > +
> > > > +		j = 0;
> > > > +		tmp_size = 0;
> > > > +		while (size > 0) {
> > > > +			data = 0;
> > > > +			tmp_size = (size < 4) ? size : 4;
> > > > +			memcpy(&data, txbuf, tmp_size);
> > > > +			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
> > > > +			size -= tmp_size;
> > > > +			j++;
> > > > +			txbuf += 1;
> > > > +		}
> > > > +		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
> > > > +	}
> > >
> > > All these nested loops to fill the TX buffer and also the ones below
> > > to read the RX buffer look much more complicated than they should
> > > really be. Can you try to make this more readable?
> > Yes
> > >
> > > Maybe something like this would work:
> > >
> > > for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 8); i += 8) {
> > > 	/* Wait for TXFIFO empty */
> > > 	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
> > > 				   FSPI_INTR_IPTXWE, 0,
> > > 				   POLL_TOUT, true);
> > >
> > > 	fspi_writel(f, op->data.buf.out + i, base + FSPI_TFDR);
> > > 	fspi_writel(f, op->data.buf.out + i + 4, base + FSPI_TFDR + 4);
> > > 	fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR); }
> > With this above 2 lines we are hardcoding it for read/write with watermark
> size as 8 bytes.
> > Watermark size can be variable and depends on the value of
> > IPRXFCR/IPTXFCR register with default value as 8 bytes Thus, I would still
> prefer to use the internal for loop instead of 2 fspi_writel(...) for FSPI_TFDR and
> FSPI_TFDR + 4 register write commands.
> 
> Just like you're hardcoding wm_size to 8, so I don't see a difference here. And I
> indeed prefer Frieder's version.

Ok. But, instead of hardcoding and doing fspi_writel() twice, I have modified this as below in my upcoming next version
                for (tmp = wm_size, j = 0; tmp > 0; tmp -= 4, j++)
                        fspi_writel(f, *txbuf++, base + FSPI_TFDR + j * 4);
This would going to give us freedom for future use if watermark size is being used as 16 or 24 (max 64) instead of its current default value of 8.

--
Regards
Yogesh Gaur
Boris Brezillon Dec. 10, 2018, 10:50 a.m. UTC | #9
On Mon, 10 Dec 2018 10:43:56 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> > > Thus, in LUT preparation we have assigned only the base address.
> > > Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register then for  
> > read/write data beyond limit of ahb_buf_size offset I get data corruption.
> > 
> > Why would you do that? We have the ->adjust_op_size() exactly for this reason,
> > so, if someone tries to do a spi_mem_op with data.nbytes > ahb_buf_size you
> > should return an error.
> >   
> Let me explain my implementation with example. If I have to write data of size 0x100 bytes at offset 0x1200 for CS1, I would program as below:
> In func nxp_fspi_select_mem(), would set value of controller address space size, memmap_phy_size, to FSPI_FLSHA2CR0 and rest all FSPI_FLSHXXCR0 as 0. 
> Value of memmap_phy_size is 0x10000000 i.e. 256 MB for my LX2160ARDB target.
> Then in nxp_fspi_prepare_lut(), I would prepare LUT ADDR with address length requirement 3/4 byte for NOR or 1/2/3/4 bytes for NAND flash.
> Also for LUT_NXP_WRITE would program data bytes as 0.
> 
> Then inside func nxp_fspi_do_op(), set register FSPI_IPCR0 as the address offset i.e. 0x1200 and in register FSPI_IPCR1 program the data size to write i.e. 0x100
> 
> If, as suggested if I tries to mark value of register FSPI_FLSHA2CR0 equal to ahb_buf_size (0x800), then access for address 0x1200 gives me wrong data. This is because as per the controller specification access to flash connected at CS1 can be performed under range of FSPI_ FLSHA1CR0 and FSPI_ FLSHA2CR0.

Don't you have a way to set an offset to apply to the address accessed
through the AHB? And if you don't, how will it work if your mapping
is smaller than the flash size?
Yogesh Narayan Gaur Dec. 10, 2018, 10:59 a.m. UTC | #10
Hi Boris,

> -----Original Message-----
> From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> Sent: Monday, December 10, 2018 4:20 PM
> To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
> Cc: Schrempf Frieder <frieder.schrempf@kontron.de>; linux-
> mtd@lists.infradead.org; marek.vasut@gmail.com; broonie@kernel.org; linux-
> spi@vger.kernel.org; devicetree@vger.kernel.org; robh@kernel.org;
> mark.rutland@arm.com; shawnguo@kernel.org; linux-arm-
> kernel@lists.infradead.org; computersforpeace@gmail.com; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI controller
> 
> On Mon, 10 Dec 2018 10:43:56 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
> > > > Thus, in LUT preparation we have assigned only the base address.
> > > > Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register
> > > > then for
> > > read/write data beyond limit of ahb_buf_size offset I get data corruption.
> > >
> > > Why would you do that? We have the ->adjust_op_size() exactly for
> > > this reason, so, if someone tries to do a spi_mem_op with
> > > data.nbytes > ahb_buf_size you should return an error.
> > >
> > Let me explain my implementation with example. If I have to write data of size
> 0x100 bytes at offset 0x1200 for CS1, I would program as below:
> > In func nxp_fspi_select_mem(), would set value of controller address space
> size, memmap_phy_size, to FSPI_FLSHA2CR0 and rest all FSPI_FLSHXXCR0 as 0.
> > Value of memmap_phy_size is 0x10000000 i.e. 256 MB for my LX2160ARDB
> target.
> > Then in nxp_fspi_prepare_lut(), I would prepare LUT ADDR with address length
> requirement 3/4 byte for NOR or 1/2/3/4 bytes for NAND flash.
> > Also for LUT_NXP_WRITE would program data bytes as 0.
> >
> > Then inside func nxp_fspi_do_op(), set register FSPI_IPCR0 as the
> > address offset i.e. 0x1200 and in register FSPI_IPCR1 program the data
> > size to write i.e. 0x100
> >
> > If, as suggested if I tries to mark value of register FSPI_FLSHA2CR0 equal to
> ahb_buf_size (0x800), then access for address 0x1200 gives me wrong data. This
> is because as per the controller specification access to flash connected at CS1
> can be performed under range of FSPI_ FLSHA1CR0 and FSPI_ FLSHA2CR0.
> 
> Don't you have a way to set an offset to apply to the address accessed through
> the AHB? And if you don't, how will it work if your mapping is smaller than the
> flash size?

Write operations are triggered using IP commands instead of AHB command.
For Read AHB command is used and in this we are adding the offset when performing memcpy_fromIO operation
      memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val), len);

AHB/IP operations are independent of the way how CS got selected. CS selection depends, e.g. CS1 on the value of register FSPI_FLSHA1CR0 and FSPI_FLSHA2CR0.

Mapping can never going to be smaller than the connected flash size as per discussion with the Board design team and if it's possible by user manually changes the non-soldered part then flash area beyond complete mapping is not accessible.
On LX2160ARDB, with mapping of 256MB, for now we are having 4 flash devices connected with size as 64 MB. If user wants he can have only one single flash with flash size of 256MB.

--
Regards
Yogesh Gaur
Boris Brezillon Dec. 10, 2018, 11:03 a.m. UTC | #11
On Mon, 10 Dec 2018 10:59:54 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Hi Boris,
> 
> > -----Original Message-----
> > From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> > Sent: Monday, December 10, 2018 4:20 PM
> > To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
> > Cc: Schrempf Frieder <frieder.schrempf@kontron.de>; linux-
> > mtd@lists.infradead.org; marek.vasut@gmail.com; broonie@kernel.org; linux-
> > spi@vger.kernel.org; devicetree@vger.kernel.org; robh@kernel.org;
> > mark.rutland@arm.com; shawnguo@kernel.org; linux-arm-
> > kernel@lists.infradead.org; computersforpeace@gmail.com; linux-
> > kernel@vger.kernel.org
> > Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI controller
> > 
> > On Mon, 10 Dec 2018 10:43:56 +0000
> > Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> >   
> > > > > Thus, in LUT preparation we have assigned only the base address.
> > > > > Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register
> > > > > then for  
> > > > read/write data beyond limit of ahb_buf_size offset I get data corruption.
> > > >
> > > > Why would you do that? We have the ->adjust_op_size() exactly for
> > > > this reason, so, if someone tries to do a spi_mem_op with
> > > > data.nbytes > ahb_buf_size you should return an error.
> > > >  
> > > Let me explain my implementation with example. If I have to write data of size  
> > 0x100 bytes at offset 0x1200 for CS1, I would program as below:  
> > > In func nxp_fspi_select_mem(), would set value of controller address space  
> > size, memmap_phy_size, to FSPI_FLSHA2CR0 and rest all FSPI_FLSHXXCR0 as 0.  
> > > Value of memmap_phy_size is 0x10000000 i.e. 256 MB for my LX2160ARDB  
> > target.  
> > > Then in nxp_fspi_prepare_lut(), I would prepare LUT ADDR with address length  
> > requirement 3/4 byte for NOR or 1/2/3/4 bytes for NAND flash.  
> > > Also for LUT_NXP_WRITE would program data bytes as 0.
> > >
> > > Then inside func nxp_fspi_do_op(), set register FSPI_IPCR0 as the
> > > address offset i.e. 0x1200 and in register FSPI_IPCR1 program the data
> > > size to write i.e. 0x100
> > >
> > > If, as suggested if I tries to mark value of register FSPI_FLSHA2CR0 equal to  
> > ahb_buf_size (0x800), then access for address 0x1200 gives me wrong data. This
> > is because as per the controller specification access to flash connected at CS1
> > can be performed under range of FSPI_ FLSHA1CR0 and FSPI_ FLSHA2CR0.
> > 
> > Don't you have a way to set an offset to apply to the address accessed through
> > the AHB? And if you don't, how will it work if your mapping is smaller than the
> > flash size?  
> 
> Write operations are triggered using IP commands instead of AHB command.
> For Read AHB command is used and in this we are adding the offset when performing memcpy_fromIO operation
>       memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val), len);
> 
> AHB/IP operations are independent of the way how CS got selected. CS selection depends, e.g. CS1 on the value of register FSPI_FLSHA1CR0 and FSPI_FLSHA2CR0.
> 
> Mapping can never going to be smaller than the connected flash size as per discussion with the Board design team and if it's possible by user manually changes the non-soldered part then flash area beyond complete mapping is not accessible.

How unfortunate is that, especially when all that was required was an
extra reg to specify a "flash_offset" to apply to the address passed by
the AHB logic.
Boris Brezillon Dec. 10, 2018, 11:09 a.m. UTC | #12
On Mon, 10 Dec 2018 10:59:54 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Hi Boris,
> 
> > -----Original Message-----
> > From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> > Sent: Monday, December 10, 2018 4:20 PM
> > To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
> > Cc: Schrempf Frieder <frieder.schrempf@kontron.de>; linux-
> > mtd@lists.infradead.org; marek.vasut@gmail.com; broonie@kernel.org; linux-
> > spi@vger.kernel.org; devicetree@vger.kernel.org; robh@kernel.org;
> > mark.rutland@arm.com; shawnguo@kernel.org; linux-arm-
> > kernel@lists.infradead.org; computersforpeace@gmail.com; linux-
> > kernel@vger.kernel.org
> > Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI controller
> > 
> > On Mon, 10 Dec 2018 10:43:56 +0000
> > Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> >   
> > > > > Thus, in LUT preparation we have assigned only the base address.
> > > > > Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register
> > > > > then for  
> > > > read/write data beyond limit of ahb_buf_size offset I get data corruption.
> > > >
> > > > Why would you do that? We have the ->adjust_op_size() exactly for
> > > > this reason, so, if someone tries to do a spi_mem_op with
> > > > data.nbytes > ahb_buf_size you should return an error.
> > > >  
> > > Let me explain my implementation with example. If I have to write data of size  
> > 0x100 bytes at offset 0x1200 for CS1, I would program as below:  
> > > In func nxp_fspi_select_mem(), would set value of controller address space  
> > size, memmap_phy_size, to FSPI_FLSHA2CR0 and rest all FSPI_FLSHXXCR0 as 0.  
> > > Value of memmap_phy_size is 0x10000000 i.e. 256 MB for my LX2160ARDB  
> > target.  
> > > Then in nxp_fspi_prepare_lut(), I would prepare LUT ADDR with address length  
> > requirement 3/4 byte for NOR or 1/2/3/4 bytes for NAND flash.  
> > > Also for LUT_NXP_WRITE would program data bytes as 0.
> > >
> > > Then inside func nxp_fspi_do_op(), set register FSPI_IPCR0 as the
> > > address offset i.e. 0x1200 and in register FSPI_IPCR1 program the data
> > > size to write i.e. 0x100
> > >
> > > If, as suggested if I tries to mark value of register FSPI_FLSHA2CR0 equal to  
> > ahb_buf_size (0x800), then access for address 0x1200 gives me wrong data. This
> > is because as per the controller specification access to flash connected at CS1
> > can be performed under range of FSPI_ FLSHA1CR0 and FSPI_ FLSHA2CR0.
> > 
> > Don't you have a way to set an offset to apply to the address accessed through
> > the AHB? And if you don't, how will it work if your mapping is smaller than the
> > flash size?  
> 
> Write operations are triggered using IP commands instead of AHB command.
> For Read AHB command is used and in this we are adding the offset when performing memcpy_fromIO operation
>       memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val), len);
> 
> AHB/IP operations are independent of the way how CS got selected. CS selection depends, e.g. CS1 on the value of register FSPI_FLSHA1CR0 and FSPI_FLSHA2CR0.
> 
> Mapping can never going to be smaller than the connected flash size as per discussion with the Board design team and if it's possible by user manually changes the non-soldered part then flash area beyond complete mapping is not accessible.
> On LX2160ARDB, with mapping of 256MB, for now we are having 4 flash devices connected with size as 64 MB. If user wants he can have only one single flash with flash size of 256MB.

Given that the dirmap interface has now been merged and the MTD side of
things is soon to be merged, I'd recommend you to implement it in your
v6 and only use non-AHB accesses for the ->exec_op() implementation.
Yogesh Narayan Gaur Dec. 10, 2018, 11:25 a.m. UTC | #13
Hi Boris,

> -----Original Message-----
> From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> Sent: Monday, December 10, 2018 4:39 PM
> To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
> Cc: Schrempf Frieder <frieder.schrempf@kontron.de>; linux-
> mtd@lists.infradead.org; marek.vasut@gmail.com; broonie@kernel.org; linux-
> spi@vger.kernel.org; devicetree@vger.kernel.org; robh@kernel.org;
> mark.rutland@arm.com; shawnguo@kernel.org; linux-arm-
> kernel@lists.infradead.org; computersforpeace@gmail.com; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI controller
> 
> On Mon, 10 Dec 2018 10:59:54 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
> > Hi Boris,
> >
> > > -----Original Message-----
> > > From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> > > Sent: Monday, December 10, 2018 4:20 PM
> > > To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
> > > Cc: Schrempf Frieder <frieder.schrempf@kontron.de>; linux-
> > > mtd@lists.infradead.org; marek.vasut@gmail.com; broonie@kernel.org;
> > > linux- spi@vger.kernel.org; devicetree@vger.kernel.org;
> > > robh@kernel.org; mark.rutland@arm.com; shawnguo@kernel.org;
> > > linux-arm- kernel@lists.infradead.org; computersforpeace@gmail.com;
> > > linux- kernel@vger.kernel.org
> > > Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI
> > > controller
> > >
> > > On Mon, 10 Dec 2018 10:43:56 +0000
> > > Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> > >
> > > > > > Thus, in LUT preparation we have assigned only the base address.
> > > > > > Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register
> > > > > > then for
> > > > > read/write data beyond limit of ahb_buf_size offset I get data corruption.
> > > > >
> > > > > Why would you do that? We have the ->adjust_op_size() exactly
> > > > > for this reason, so, if someone tries to do a spi_mem_op with
> > > > > data.nbytes > ahb_buf_size you should return an error.
> > > > >
> > > > Let me explain my implementation with example. If I have to write
> > > > data of size
> > > 0x100 bytes at offset 0x1200 for CS1, I would program as below:
> > > > In func nxp_fspi_select_mem(), would set value of controller
> > > > address space
> > > size, memmap_phy_size, to FSPI_FLSHA2CR0 and rest all FSPI_FLSHXXCR0 as
> 0.
> > > > Value of memmap_phy_size is 0x10000000 i.e. 256 MB for my
> > > > LX2160ARDB
> > > target.
> > > > Then in nxp_fspi_prepare_lut(), I would prepare LUT ADDR with
> > > > address length
> > > requirement 3/4 byte for NOR or 1/2/3/4 bytes for NAND flash.
> > > > Also for LUT_NXP_WRITE would program data bytes as 0.
> > > >
> > > > Then inside func nxp_fspi_do_op(), set register FSPI_IPCR0 as the
> > > > address offset i.e. 0x1200 and in register FSPI_IPCR1 program the
> > > > data size to write i.e. 0x100
> > > >
> > > > If, as suggested if I tries to mark value of register
> > > > FSPI_FLSHA2CR0 equal to
> > > ahb_buf_size (0x800), then access for address 0x1200 gives me wrong
> > > data. This is because as per the controller specification access to
> > > flash connected at CS1 can be performed under range of FSPI_ FLSHA1CR0
> and FSPI_ FLSHA2CR0.
> > >
> > > Don't you have a way to set an offset to apply to the address
> > > accessed through the AHB? And if you don't, how will it work if your
> > > mapping is smaller than the flash size?
> >
> > Write operations are triggered using IP commands instead of AHB command.
> > For Read AHB command is used and in this we are adding the offset when
> performing memcpy_fromIO operation
> >       memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val),
> > len);
> >
> > AHB/IP operations are independent of the way how CS got selected. CS
> selection depends, e.g. CS1 on the value of register FSPI_FLSHA1CR0 and
> FSPI_FLSHA2CR0.
> >
> > Mapping can never going to be smaller than the connected flash size as per
> discussion with the Board design team and if it's possible by user manually
> changes the non-soldered part then flash area beyond complete mapping is not
> accessible.
> > On LX2160ARDB, with mapping of 256MB, for now we are having 4 flash
> devices connected with size as 64 MB. If user wants he can have only one single
> flash with flash size of 256MB.
> 
> Given that the dirmap interface has now been merged and the MTD side of
> things is soon to be merged, I'd recommend you to implement it in your
> v6 and only use non-AHB accesses for the ->exec_op() implementation.

This would going to be performance hit if I would use non-AHB accesses for ->exec_op().
In read in v5 I am using AHB mode for read if read data size is greater than rxfifo size and if its less than rxfifo then use IP mode for read.

--
Regards
Yogesh Gaur
Boris Brezillon Dec. 10, 2018, 12:11 p.m. UTC | #14
On Mon, 10 Dec 2018 11:25:55 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Hi Boris,
> 
> > -----Original Message-----
> > From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> > Sent: Monday, December 10, 2018 4:39 PM
> > To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
> > Cc: Schrempf Frieder <frieder.schrempf@kontron.de>; linux-
> > mtd@lists.infradead.org; marek.vasut@gmail.com; broonie@kernel.org; linux-
> > spi@vger.kernel.org; devicetree@vger.kernel.org; robh@kernel.org;
> > mark.rutland@arm.com; shawnguo@kernel.org; linux-arm-
> > kernel@lists.infradead.org; computersforpeace@gmail.com; linux-
> > kernel@vger.kernel.org
> > Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI controller
> > 
> > On Mon, 10 Dec 2018 10:59:54 +0000
> > Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> >   
> > > Hi Boris,
> > >  
> > > > -----Original Message-----
> > > > From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> > > > Sent: Monday, December 10, 2018 4:20 PM
> > > > To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
> > > > Cc: Schrempf Frieder <frieder.schrempf@kontron.de>; linux-
> > > > mtd@lists.infradead.org; marek.vasut@gmail.com; broonie@kernel.org;
> > > > linux- spi@vger.kernel.org; devicetree@vger.kernel.org;
> > > > robh@kernel.org; mark.rutland@arm.com; shawnguo@kernel.org;
> > > > linux-arm- kernel@lists.infradead.org; computersforpeace@gmail.com;
> > > > linux- kernel@vger.kernel.org
> > > > Subject: Re: [PATCH v5 1/5] spi: spi-mem: Add driver for NXP FlexSPI
> > > > controller
> > > >
> > > > On Mon, 10 Dec 2018 10:43:56 +0000
> > > > Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> > > >  
> > > > > > > Thus, in LUT preparation we have assigned only the base address.
> > > > > > > Now if I have assigned ahb_buf_size to FSPI_FLSHXXCR0 register
> > > > > > > then for  
> > > > > > read/write data beyond limit of ahb_buf_size offset I get data corruption.
> > > > > >
> > > > > > Why would you do that? We have the ->adjust_op_size() exactly
> > > > > > for this reason, so, if someone tries to do a spi_mem_op with
> > > > > > data.nbytes > ahb_buf_size you should return an error.
> > > > > >  
> > > > > Let me explain my implementation with example. If I have to write
> > > > > data of size  
> > > > 0x100 bytes at offset 0x1200 for CS1, I would program as below:  
> > > > > In func nxp_fspi_select_mem(), would set value of controller
> > > > > address space  
> > > > size, memmap_phy_size, to FSPI_FLSHA2CR0 and rest all FSPI_FLSHXXCR0 as  
> > 0.  
> > > > > Value of memmap_phy_size is 0x10000000 i.e. 256 MB for my
> > > > > LX2160ARDB  
> > > > target.  
> > > > > Then in nxp_fspi_prepare_lut(), I would prepare LUT ADDR with
> > > > > address length  
> > > > requirement 3/4 byte for NOR or 1/2/3/4 bytes for NAND flash.  
> > > > > Also for LUT_NXP_WRITE would program data bytes as 0.
> > > > >
> > > > > Then inside func nxp_fspi_do_op(), set register FSPI_IPCR0 as the
> > > > > address offset i.e. 0x1200 and in register FSPI_IPCR1 program the
> > > > > data size to write i.e. 0x100
> > > > >
> > > > > If, as suggested if I tries to mark value of register
> > > > > FSPI_FLSHA2CR0 equal to  
> > > > ahb_buf_size (0x800), then access for address 0x1200 gives me wrong
> > > > data. This is because as per the controller specification access to
> > > > flash connected at CS1 can be performed under range of FSPI_ FLSHA1CR0  
> > and FSPI_ FLSHA2CR0.  
> > > >
> > > > Don't you have a way to set an offset to apply to the address
> > > > accessed through the AHB? And if you don't, how will it work if your
> > > > mapping is smaller than the flash size?  
> > >
> > > Write operations are triggered using IP commands instead of AHB command.
> > > For Read AHB command is used and in this we are adding the offset when  
> > performing memcpy_fromIO operation  
> > >       memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val),
> > > len);
> > >
> > > AHB/IP operations are independent of the way how CS got selected. CS  
> > selection depends, e.g. CS1 on the value of register FSPI_FLSHA1CR0 and
> > FSPI_FLSHA2CR0.  
> > >
> > > Mapping can never going to be smaller than the connected flash size as per  
> > discussion with the Board design team and if it's possible by user manually
> > changes the non-soldered part then flash area beyond complete mapping is not
> > accessible.  
> > > On LX2160ARDB, with mapping of 256MB, for now we are having 4 flash  
> > devices connected with size as 64 MB. If user wants he can have only one single
> > flash with flash size of 256MB.
> > 
> > Given that the dirmap interface has now been merged and the MTD side of
> > things is soon to be merged, I'd recommend you to implement it in your
> > v6 and only use non-AHB accesses for the ->exec_op() implementation.  
> 
> This would going to be performance hit if I would use non-AHB accesses for ->exec_op().

Not if you implement the dirmap hooks.
diff mbox series

Patch

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 7d3a5c9..36630a1 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -259,6 +259,16 @@  config SPI_FSL_LPSPI
 	help
 	  This enables Freescale i.MX LPSPI controllers in master mode.
 
+config SPI_NXP_FLEXSPI
+	tristate "NXP Flex SPI controller"
+	depends on ARCH_LAYERSCAPE || HAS_IOMEM
+	help
+	  This enables support for the Flex SPI controller in master mode.
+	  Up to four slave devices can be connected on two buses with two
+	  chipselects each.
+	  This controller does not support generic SPI messages and only
+	  supports the high-level SPI memory interface.
+
 config SPI_GPIO
 	tristate "GPIO-based bitbanging SPI Master"
 	depends on GPIOLIB || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 3575205..55fec5c 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -60,6 +60,7 @@  obj-$(CONFIG_SPI_MPC52xx)		+= spi-mpc52xx.o
 obj-$(CONFIG_SPI_MT65XX)                += spi-mt65xx.o
 obj-$(CONFIG_SPI_MXS)			+= spi-mxs.o
 obj-$(CONFIG_SPI_NUC900)		+= spi-nuc900.o
+obj-$(CONFIG_SPI_NXP_FLEXSPI)		+= spi-nxp-fspi.o
 obj-$(CONFIG_SPI_OC_TINY)		+= spi-oc-tiny.o
 spi-octeon-objs				:= spi-cavium.o spi-cavium-octeon.o
 obj-$(CONFIG_SPI_OCTEON)		+= spi-octeon.o
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
new file mode 100644
index 0000000..a35013b
--- /dev/null
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -0,0 +1,1145 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * NXP FlexSPI(FSPI) controller driver.
+ *
+ * Copyright 2018 NXP.
+ *
+ * FlexSPI is a flexsible SPI host controller which supports two SPI
+ * channels and up to 4 external devices. Each channel supports
+ * Single/Dual/Quad/Octal mode data transfer (1/2/4/8 bidirectional
+ * data lines).
+ *
+ * FlexSPI controller is driven by the LUT(Look-up Table) registers
+ * LUT registers are a look-up-table for sequences of instructions.
+ * A valid sequence consists of four LUT registers.
+ * Maximum 32 LUT sequences can be programmed simultaneously.
+ *
+ * LUTs are being created at run-time based on the commands passed
+ * from the spi-mem framework, thus using single LUT index.
+ *
+ * Software triggered Flash read/write access by IP Bus.
+ *
+ * Memory mapped read access by AHB Bus.
+ *
+ * Based on SPI MEM interface and spi-fsl-qspi.c driver.
+ *
+ * Author:
+ *     Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
+ *     Boris Brezillion <boris.brezillon@bootlin.com>
+ *     Frieder Schrempf <frieder.schrempf@kontron.de>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
+#include <linux/sizes.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+/*
+ * The driver only uses one single LUT entry, that is updated on
+ * each call of exec_op(). Index 0 is preset at boot with a basic
+ * read operation, so let's use the last entry (31).
+ */
+#define	SEQID_LUT			31
+
+/* Registers used by the driver */
+#define FSPI_MCR0			0x00
+#define FSPI_MCR0_AHB_TIMEOUT(x)	((x) << 24)
+#define FSPI_MCR0_IP_TIMEOUT(x)		((x) << 16)
+#define FSPI_MCR0_LEARN_EN		BIT(15)
+#define FSPI_MCR0_SCRFRUN_EN		BIT(14)
+#define FSPI_MCR0_OCTCOMB_EN		BIT(13)
+#define FSPI_MCR0_DOZE_EN		BIT(12)
+#define FSPI_MCR0_HSEN			BIT(11)
+#define FSPI_MCR0_SERCLKDIV		BIT(8)
+#define FSPI_MCR0_ATDF_EN		BIT(7)
+#define FSPI_MCR0_ARDF_EN		BIT(6)
+#define FSPI_MCR0_RXCLKSRC(x)		((x) << 4)
+#define FSPI_MCR0_END_CFG(x)		((x) << 2)
+#define FSPI_MCR0_MDIS			BIT(1)
+#define FSPI_MCR0_SWRST			BIT(0)
+
+#define FSPI_MCR1			0x04
+#define FSPI_MCR1_SEQ_TIMEOUT(x)	((x) << 16)
+#define FSPI_MCR1_AHB_TIMEOUT(x)	(x)
+
+#define FSPI_MCR2			0x08
+#define FSPI_MCR2_IDLE_WAIT(x)		((x) << 24)
+#define FSPI_MCR2_SAMEDEVICEEN		BIT(15)
+#define FSPI_MCR2_CLRLRPHS		BIT(14)
+#define FSPI_MCR2_ABRDATSZ		BIT(8)
+#define FSPI_MCR2_ABRLEARN		BIT(7)
+#define FSPI_MCR2_ABR_READ		BIT(6)
+#define FSPI_MCR2_ABRWRITE		BIT(5)
+#define FSPI_MCR2_ABRDUMMY		BIT(4)
+#define FSPI_MCR2_ABR_MODE		BIT(3)
+#define FSPI_MCR2_ABRCADDR		BIT(2)
+#define FSPI_MCR2_ABRRADDR		BIT(1)
+#define FSPI_MCR2_ABR_CMD		BIT(0)
+
+#define FSPI_AHBCR			0x0c
+#define FSPI_AHBCR_RDADDROPT		BIT(6)
+#define FSPI_AHBCR_PREF_EN		BIT(5)
+#define FSPI_AHBCR_BUFF_EN		BIT(4)
+#define FSPI_AHBCR_CACH_EN		BIT(3)
+#define FSPI_AHBCR_CLRTXBUF		BIT(2)
+#define FSPI_AHBCR_CLRRXBUF		BIT(1)
+#define FSPI_AHBCR_PAR_EN		BIT(0)
+
+#define FSPI_INTEN			0x10
+#define FSPI_INTEN_SCLKSBWR		BIT(9)
+#define FSPI_INTEN_SCLKSBRD		BIT(8)
+#define FSPI_INTEN_DATALRNFL		BIT(7)
+#define FSPI_INTEN_IPTXWE		BIT(6)
+#define FSPI_INTEN_IPRXWA		BIT(5)
+#define FSPI_INTEN_AHBCMDERR		BIT(4)
+#define FSPI_INTEN_IPCMDERR		BIT(3)
+#define FSPI_INTEN_AHBCMDGE		BIT(2)
+#define FSPI_INTEN_IPCMDGE		BIT(1)
+#define FSPI_INTEN_IPCMDDONE		BIT(0)
+
+#define FSPI_INTR			0x14
+#define FSPI_INTR_SCLKSBWR		BIT(9)
+#define FSPI_INTR_SCLKSBRD		BIT(8)
+#define FSPI_INTR_DATALRNFL		BIT(7)
+#define FSPI_INTR_IPTXWE		BIT(6)
+#define FSPI_INTR_IPRXWA		BIT(5)
+#define FSPI_INTR_AHBCMDERR		BIT(4)
+#define FSPI_INTR_IPCMDERR		BIT(3)
+#define FSPI_INTR_AHBCMDGE		BIT(2)
+#define FSPI_INTR_IPCMDGE		BIT(1)
+#define FSPI_INTR_IPCMDDONE		BIT(0)
+
+#define FSPI_LUTKEY			0x18
+#define FSPI_LUTKEY_VALUE		0x5AF05AF0
+
+#define FSPI_LCKCR			0x1C
+
+#define FSPI_LCKER_LOCK			0x1
+#define FSPI_LCKER_UNLOCK		0x2
+
+#define FSPI_BUFXCR_INVALID_MSTRID	0xE
+#define FSPI_AHBRX_BUF0CR0		0x20
+#define FSPI_AHBRX_BUF1CR0		0x24
+#define FSPI_AHBRX_BUF2CR0		0x28
+#define FSPI_AHBRX_BUF3CR0		0x2C
+#define FSPI_AHBRX_BUF4CR0		0x30
+#define FSPI_AHBRX_BUF5CR0		0x34
+#define FSPI_AHBRX_BUF6CR0		0x38
+#define FSPI_AHBRX_BUF7CR0		0x3C
+#define FSPI_AHBRXBUF0CR7_PREF		BIT(31)
+
+#define FSPI_AHBRX_BUF0CR1		0x40
+#define FSPI_AHBRX_BUF1CR1		0x44
+#define FSPI_AHBRX_BUF2CR1		0x48
+#define FSPI_AHBRX_BUF3CR1		0x4C
+#define FSPI_AHBRX_BUF4CR1		0x50
+#define FSPI_AHBRX_BUF5CR1		0x54
+#define FSPI_AHBRX_BUF6CR1		0x58
+#define FSPI_AHBRX_BUF7CR1		0x5C
+
+#define FSPI_FLSHA1CR0			0x60
+#define FSPI_FLSHA2CR0			0x64
+#define FSPI_FLSHB1CR0			0x68
+#define FSPI_FLSHB2CR0			0x6C
+#define FSPI_FLSHXCR0_SZ_KB		10
+#define FSPI_FLSHXCR0_SZ(x)		((x) >> FSPI_FLSHXCR0_SZ_KB)
+
+#define FSPI_FLSHA1CR1			0x70
+#define FSPI_FLSHA2CR1			0x74
+#define FSPI_FLSHB1CR1			0x78
+#define FSPI_FLSHB2CR1			0x7C
+#define FSPI_FLSHXCR1_CSINTR(x)		((x) << 16)
+#define FSPI_FLSHXCR1_CAS(x)		((x) << 11)
+#define FSPI_FLSHXCR1_WA		BIT(10)
+#define FSPI_FLSHXCR1_TCSH(x)		((x) << 5)
+#define FSPI_FLSHXCR1_TCSS(x)		(x)
+
+#define FSPI_FLSHA1CR2			0x80
+#define FSPI_FLSHA2CR2			0x84
+#define FSPI_FLSHB1CR2			0x88
+#define FSPI_FLSHB2CR2			0x8C
+#define FSPI_FLSHXCR2_CLRINSP		BIT(24)
+#define FSPI_FLSHXCR2_AWRWAIT		BIT(16)
+#define FSPI_FLSHXCR2_AWRSEQN_SHIFT	13
+#define FSPI_FLSHXCR2_AWRSEQI_SHIFT	8
+#define FSPI_FLSHXCR2_ARDSEQN_SHIFT	5
+#define FSPI_FLSHXCR2_ARDSEQI_SHIFT	0
+
+#define FSPI_IPCR0			0xA0
+
+#define FSPI_IPCR1			0xA4
+#define FSPI_IPCR1_IPAREN		BIT(31)
+#define FSPI_IPCR1_SEQNUM_SHIFT		24
+#define FSPI_IPCR1_SEQID_SHIFT		16
+#define FSPI_IPCR1_IDATSZ(x)		(x)
+
+#define FSPI_IPCMD			0xB0
+#define FSPI_IPCMD_TRG			BIT(0)
+
+#define FSPI_DLPR			0xB4
+
+#define FSPI_IPRXFCR			0xB8
+#define FSPI_IPRXFCR_CLR		BIT(0)
+#define FSPI_IPRXFCR_DMA_EN		BIT(1)
+#define FSPI_IPRXFCR_WMRK(x)		((x) << 2)
+
+#define FSPI_IPTXFCR			0xBC
+#define FSPI_IPTXFCR_CLR		BIT(0)
+#define FSPI_IPTXFCR_DMA_EN		BIT(1)
+#define FSPI_IPTXFCR_WMRK(x)		((x) << 2)
+
+#define FSPI_DLLACR			0xC0
+#define FSPI_DLLACR_OVRDEN		BIT(8)
+
+#define FSPI_DLLBCR			0xC4
+#define FSPI_DLLBCR_OVRDEN		BIT(8)
+
+#define FSPI_STS0			0xE0
+#define FSPI_STS0_DLPHB(x)		((x) << 8)
+#define FSPI_STS0_DLPHA(x)		((x) << 4)
+#define FSPI_STS0_CMD_SRC(x)		((x) << 2)
+#define FSPI_STS0_ARB_IDLE		BIT(1)
+#define FSPI_STS0_SEQ_IDLE		BIT(0)
+
+#define FSPI_STS1			0xE4
+#define FSPI_STS1_IP_ERRCD(x)		((x) << 24)
+#define FSPI_STS1_IP_ERRID(x)		((x) << 16)
+#define FSPI_STS1_AHB_ERRCD(x)		((x) << 8)
+#define FSPI_STS1_AHB_ERRID(x)		(x)
+
+#define FSPI_AHBSPNST			0xEC
+#define FSPI_AHBSPNST_DATLFT(x)		((x) << 16)
+#define FSPI_AHBSPNST_BUFID(x)		((x) << 1)
+#define FSPI_AHBSPNST_ACTIVE		BIT(0)
+
+#define FSPI_IPRXFSTS			0xF0
+#define FSPI_IPRXFSTS_RDCNTR(x)		((x) << 16)
+#define FSPI_IPRXFSTS_FILL(x)		(x)
+
+#define FSPI_IPTXFSTS			0xF4
+#define FSPI_IPTXFSTS_WRCNTR(x)		((x) << 16)
+#define FSPI_IPTXFSTS_FILL(x)		(x)
+
+#define FSPI_RFDR			0x100
+#define FSPI_TFDR			0x180
+
+#define FSPI_LUT_BASE			0x200
+#define FSPI_LUT_OFFSET			(SEQID_LUT * 4 * 4)
+#define FSPI_LUT_REG(idx) \
+	(FSPI_LUT_BASE + FSPI_LUT_OFFSET + (idx) * 4)
+
+/* register map end */
+
+/* Instruction set for the LUT register. */
+#define LUT_STOP			0x00
+#define LUT_CMD				0x01
+#define LUT_ADDR			0x02
+#define LUT_CADDR_SDR			0x03
+#define LUT_MODE			0x04
+#define LUT_MODE2			0x05
+#define LUT_MODE4			0x06
+#define LUT_MODE8			0x07
+#define LUT_NXP_WRITE			0x08
+#define LUT_NXP_READ			0x09
+#define LUT_LEARN_SDR			0x0A
+#define LUT_DATSZ_SDR			0x0B
+#define LUT_DUMMY			0x0C
+#define LUT_DUMMY_RWDS_SDR		0x0D
+#define LUT_JMP_ON_CS			0x1F
+#define LUT_CMD_DDR			0x21
+#define LUT_ADDR_DDR			0x22
+#define LUT_CADDR_DDR			0x23
+#define LUT_MODE_DDR			0x24
+#define LUT_MODE2_DDR			0x25
+#define LUT_MODE4_DDR			0x26
+#define LUT_MODE8_DDR			0x27
+#define LUT_WRITE_DDR			0x28
+#define LUT_READ_DDR			0x29
+#define LUT_LEARN_DDR			0x2A
+#define LUT_DATSZ_DDR			0x2B
+#define LUT_DUMMY_DDR			0x2C
+#define LUT_DUMMY_RWDS_DDR		0x2D
+
+/*
+ * Calculate number of required PAD bits for LUT register.
+ *
+ * The pad stands for the number of IO lines [0:7].
+ * For example, the octal read needs eight IO lines,
+ * so you should use LUT_PAD(8). This macro
+ * returns 3 i.e. use eight (2^3) IP lines for read.
+ */
+#define LUT_PAD(x) (fls(x) - 1)
+
+/*
+ * Macro for constructing the LUT entries with the following
+ * register layout:
+ *
+ *  ---------------------------------------------------
+ *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
+ *  ---------------------------------------------------
+ */
+#define PAD_SHIFT		8
+#define INSTR_SHIFT		10
+#define OPRND_SHIFT		16
+
+/* Macros for constructing the LUT register. */
+#define LUT_DEF(idx, ins, pad, opr)			  \
+	((((ins) << INSTR_SHIFT) | ((pad) << PAD_SHIFT) | \
+	(opr)) << (((idx) % 2) * OPRND_SHIFT))
+
+/* Operands for the LUT register. */
+#define ADDR8BIT		0x08
+#define ADDR16BIT		0x10
+#define ADDR24BIT		0x18
+#define ADDR32BIT		0x20
+
+#define POLL_TOUT		5000
+#define NXP_FSPI_MAX_CHIPSELECT		4
+
+struct nxp_fspi_devtype_data {
+	unsigned int rxfifo;
+	unsigned int txfifo;
+	unsigned int ahb_buf_size;
+	unsigned int quirks;
+	bool little_endian;
+};
+
+static const struct nxp_fspi_devtype_data lx2160a_data = {
+	.rxfifo = SZ_512,       /* (64  * 64 bits)  */
+	.txfifo = SZ_1K,        /* (128 * 64 bits)  */
+	.ahb_buf_size = SZ_2K,  /* (256 * 64 bits)  */
+	.quirks = 0,
+	.little_endian = true,  /* little-endian    */
+};
+
+struct nxp_fspi {
+	void __iomem *iobase;
+	void __iomem *ahb_addr;
+	u32 memmap_phy;
+	u32 memmap_phy_size;
+	struct clk *clk, *clk_en;
+	struct device *dev;
+	struct completion c;
+	const struct nxp_fspi_devtype_data *devtype_data;
+	struct mutex lock;
+	struct pm_qos_request pm_qos_req;
+	int selected;
+};
+
+/*
+ * R/W functions for big- or little-endian registers:
+ * The FSPI controller's endianness is independent of
+ * the CPU core's endianness. So far, although the CPU
+ * core is little-endian the FSPI controller can use
+ * big-endian or little-endian.
+ */
+static void fspi_writel(struct nxp_fspi *f, u32 val, void __iomem *addr)
+{
+	if (f->devtype_data->little_endian)
+		iowrite32(val, addr);
+	else
+		iowrite32be(val, addr);
+}
+
+static u32 fspi_readl(struct nxp_fspi *f, void __iomem *addr)
+{
+	if (f->devtype_data->little_endian)
+		return ioread32(addr);
+	else
+		return ioread32be(addr);
+}
+
+static irqreturn_t nxp_fspi_irq_handler(int irq, void *dev_id)
+{
+	struct nxp_fspi *f = dev_id;
+	u32 reg;
+
+	/* clear interrupt */
+	reg = fspi_readl(f, f->iobase + FSPI_INTR);
+	fspi_writel(f, FSPI_INTR_IPCMDDONE, f->iobase + FSPI_INTR);
+
+	if (reg & FSPI_INTR_IPCMDDONE)
+		complete(&f->c);
+
+	return IRQ_HANDLED;
+}
+
+static int nxp_fspi_check_buswidth(struct nxp_fspi *f, u8 width)
+{
+	switch (width) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static bool nxp_fspi_supports_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	ret = nxp_fspi_check_buswidth(f, op->cmd.buswidth);
+
+	if (op->addr.nbytes)
+		ret |= nxp_fspi_check_buswidth(f, op->addr.buswidth);
+
+	if (op->dummy.nbytes)
+		ret |= nxp_fspi_check_buswidth(f, op->dummy.buswidth);
+
+	if (op->data.nbytes)
+		ret |= nxp_fspi_check_buswidth(f, op->data.buswidth);
+
+	if (ret)
+		return false;
+
+	/*
+	 * The number of instructions needed for the op, needs
+	 * to fit into a single LUT entry.
+	 */
+	if (op->addr.nbytes +
+	   (op->dummy.nbytes ? 1:0) +
+	   (op->data.nbytes ? 1:0) > 6)
+		return false;
+
+	/* Max 64 dummy clock cycles supported */
+	if (op->dummy.buswidth &&
+	    (op->dummy.nbytes * 8 / op->dummy.buswidth > 64))
+		return false;
+
+	/* Max data length, check controller limits and alignment */
+	if (op->data.dir == SPI_MEM_DATA_IN &&
+	    (op->data.nbytes > f->devtype_data->ahb_buf_size ||
+	     (op->data.nbytes > f->devtype_data->rxfifo - 4 &&
+	      !IS_ALIGNED(op->data.nbytes, 8))))
+		return false;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT &&
+	    op->data.nbytes > f->devtype_data->txfifo)
+		return false;
+
+	return true;
+}
+
+/* Instead of busy looping invoke readl_poll_timeout functionality. */
+static int fspi_readl_poll_tout(struct nxp_fspi *f, void __iomem *base,
+				u32 mask, u32 delay_us,
+				u32 timeout_us, bool condition)
+{
+	u32 reg;
+
+	if (!f->devtype_data->little_endian)
+		mask = (u32)cpu_to_be32(mask);
+
+	if (condition)
+		return readl_poll_timeout(base, reg, (reg & mask),
+					  delay_us, timeout_us);
+	else
+		return readl_poll_timeout(base, reg, !(reg & mask),
+					  delay_us, timeout_us);
+}
+
+/*
+ * If the slave device content being changed by Write/Erase, need to
+ * invalidate the AHB buffer. This can be achieved by doing the reset
+ * of controller after setting MCR0[SWRESET] bit.
+ */
+static inline void nxp_fspi_invalid(struct nxp_fspi *f)
+{
+	u32 reg;
+	int ret;
+
+	reg = fspi_readl(f, f->iobase + FSPI_MCR0);
+	fspi_writel(f, reg | FSPI_MCR0_SWRST, f->iobase + FSPI_MCR0);
+
+	/* w1c register, wait unit clear */
+	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
+				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
+	WARN_ON(ret);
+}
+
+static void nxp_fspi_prepare_lut(struct nxp_fspi *f,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = f->iobase;
+	u32 lutval[4] = {};
+	int lutidx = 1, i;
+
+	/* cmd */
+	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
+			     op->cmd.opcode);
+
+	/* addr bus width */
+	if (op->addr.nbytes) {
+		u32 addrlen = 0;
+
+		switch (op->addr.nbytes) {
+		case 1:
+			addrlen = ADDR8BIT;
+			break;
+		case 2:
+			addrlen = ADDR16BIT;
+			break;
+		case 3:
+			addrlen = ADDR24BIT;
+			break;
+		case 4:
+			addrlen = ADDR32BIT;
+			break;
+		default:
+			dev_err(f->dev, "In-correct address length\n");
+			return;
+		}
+
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_ADDR,
+					      LUT_PAD(op->addr.buswidth),
+					      addrlen);
+		lutidx++;
+	}
+
+	/* dummy bytes, if needed */
+	if (op->dummy.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
+		/*
+		 * Due to FlexSPI controller limitation number of PAD for dummy
+		 * buswidth needs to be programmed as equal to data buswidth.
+		 */
+					      LUT_PAD(op->data.buswidth),
+					      op->dummy.nbytes * 8 /
+					      op->dummy.buswidth);
+		lutidx++;
+	}
+
+	/* read/write data bytes */
+	if (op->data.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx,
+					      op->data.dir == SPI_MEM_DATA_IN ?
+					      LUT_NXP_READ : LUT_NXP_WRITE,
+					      LUT_PAD(op->data.buswidth),
+					      0);
+		lutidx++;
+	}
+
+	/* stop condition. */
+	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
+
+	/* unlock LUT */
+	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
+	fspi_writel(f, FSPI_LCKER_UNLOCK, f->iobase + FSPI_LCKCR);
+
+	/* fill LUT */
+	for (i = 0; i < ARRAY_SIZE(lutval); i++)
+		fspi_writel(f, lutval[i], base + FSPI_LUT_REG(i));
+
+	dev_dbg(f->dev, "CMD[%x] lutval[0:%x \t 1:%x \t 2:%x \t 3:%x]\n",
+		op->cmd.opcode, lutval[0], lutval[1], lutval[2], lutval[3]);
+
+	/* lock LUT */
+	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
+	fspi_writel(f, FSPI_LCKER_LOCK, f->iobase + FSPI_LCKCR);
+}
+
+static int nxp_fspi_clk_prep_enable(struct nxp_fspi *f)
+{
+	int ret;
+
+	ret = clk_prepare_enable(f->clk_en);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(f->clk);
+	if (ret) {
+		clk_disable_unprepare(f->clk_en);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
+{
+	clk_disable_unprepare(f->clk);
+	clk_disable_unprepare(f->clk_en);
+}
+
+/*
+ * In FlexSPI controller, flash access is based on value of FSPI_FLSHXXCR0
+ * register and start base address of the slave device.
+ *
+ *							    (Higher address)
+ *				--------    <-- FLSHB2CR0
+ *				|  B2  |
+ *				|      |
+ *	B2 start address -->	--------    <-- FLSHB1CR0
+ *				|  B1  |
+ *				|      |
+ *	B1 start address -->	--------    <-- FLSHA2CR0
+ *				|  A2  |
+ *				|      |
+ *	A2 start address -->	--------    <-- FLSHA1CR0
+ *				|  A1  |
+ *				|      |
+ *	A1 start address -->	--------		    (Lower address)
+ *
+ *
+ * Start base address defines the starting address range for given CS and
+ * FSPI_FLSHXXCR0 defines the size of the slave device connected at given CS.
+ *
+ * But, different targets are having different combinations of number of CS,
+ * some targets only have single CS or two CS covering controller's full
+ * memory mapped space area.
+ * Thus, implementation is being done as independent of the size and number
+ * of the connected slave device.
+ * Assign controller memory mapped space size as the size to the connected
+ * slave device.
+ * Mark FLSHxxCR0 as zero initially and then assign value only to the selected
+ * chip-select Flash configuration register.
+ *
+ * For e.g. to access CS2 (B1), FLSHB1CR0 register would be equal to the
+ * memory mapped size of the controller.
+ * Value for rest of the CS FLSHxxCR0 register would be zero.
+ *
+ */
+static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi)
+{
+	unsigned long rate = spi->max_speed_hz;
+	int ret;
+	uint64_t size_kb;
+
+	/*
+	 * Return, if previously selected slave device is same as current
+	 * requested slave device.
+	 */
+	if (f->selected == spi->chip_select)
+		return;
+
+	/* Reset FLSHxxCR0 registers */
+	fspi_writel(f, 0, f->iobase + FSPI_FLSHA1CR0);
+	fspi_writel(f, 0, f->iobase + FSPI_FLSHA2CR0);
+	fspi_writel(f, 0, f->iobase + FSPI_FLSHB1CR0);
+	fspi_writel(f, 0, f->iobase + FSPI_FLSHB2CR0);
+
+	/* Assign controller memory mapped space as size, KBytes, of flash. */
+	size_kb = FSPI_FLSHXCR0_SZ(f->memmap_phy_size);
+
+	switch (spi->chip_select) {
+	case 0:
+		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHA1CR0);
+		break;
+	case 1:
+		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHA2CR0);
+		break;
+	case 2:
+		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHB1CR0);
+		break;
+	case 3:
+		fspi_writel(f, size_kb, f->iobase + FSPI_FLSHB2CR0);
+		break;
+	default:
+		dev_err(f->dev, "In-correct CS provided\n");
+		return;
+	}
+
+	dev_dbg(f->dev, "Slave device [CS:%x] selected\n", spi->chip_select);
+
+	nxp_fspi_clk_disable_unprep(f);
+
+	ret = clk_set_rate(f->clk, rate);
+	if (ret)
+		return;
+
+	ret = nxp_fspi_clk_prep_enable(f);
+	if (ret)
+		return;
+	f->selected = spi->chip_select;
+}
+
+static void nxp_fspi_read_ahb(struct nxp_fspi *f, const struct spi_mem_op *op)
+{
+	u32 len = op->data.nbytes;
+
+	/* Read out the data directly from the AHB buffer. */
+	memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val), len);
+}
+
+static void nxp_fspi_fill_txfifo(struct nxp_fspi *f,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = f->iobase;
+	int i, j, ret;
+	int size, tmp_size, wm_size;
+	u32 data = 0;
+	u32 *txbuf = (u32 *) op->data.buf.out;
+
+	/* clear the TX FIFO. */
+	fspi_writel(f, FSPI_IPTXFCR_CLR, base + FSPI_IPTXFCR);
+
+	/* Default value of water mark level is 8 bytes. */
+	wm_size = 8;
+	size = op->data.nbytes / wm_size;
+	for (i = 0; i < size; i++) {
+		/* Wait for TXFIFO empty */
+		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
+					   FSPI_INTR_IPTXWE, 0,
+					   POLL_TOUT, true);
+		WARN_ON(ret);
+
+		j = 0;
+		tmp_size = wm_size;
+		while (tmp_size > 0) {
+			data = 0;
+			memcpy(&data, txbuf, 4);
+			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
+			tmp_size -= 4;
+			j++;
+			txbuf += 1;
+		}
+		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
+	}
+
+	size = op->data.nbytes % wm_size;
+	if (size) {
+		/* Wait for TXFIFO empty */
+		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
+					   FSPI_INTR_IPTXWE, 0,
+					   POLL_TOUT, true);
+		WARN_ON(ret);
+
+		j = 0;
+		tmp_size = 0;
+		while (size > 0) {
+			data = 0;
+			tmp_size = (size < 4) ? size : 4;
+			memcpy(&data, txbuf, tmp_size);
+			fspi_writel(f, data, base + FSPI_TFDR + j * 4);
+			size -= tmp_size;
+			j++;
+			txbuf += 1;
+		}
+		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
+	}
+}
+
+static void nxp_fspi_read_rxfifo(struct nxp_fspi *f,
+			  const struct spi_mem_op *op)
+{
+	void __iomem *base = f->iobase;
+	int i, j;
+	int size, tmp_size, wm_size, ret;
+	u32 tmp = 0;
+	u8 *buf = op->data.buf.in;
+	u32 len = op->data.nbytes;
+
+	/* Default value of water mark level is 8 bytes. */
+	wm_size = 8;
+
+	while (len > 0) {
+		size = len / wm_size;
+
+		for (i = 0; i < size; i++) {
+			/* Wait for RXFIFO available */
+			ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
+						   FSPI_INTR_IPRXWA, 0,
+						   POLL_TOUT, true);
+			WARN_ON(ret);
+
+			j = 0;
+			tmp_size = wm_size;
+			while (tmp_size > 0) {
+				tmp = 0;
+				tmp = fspi_readl(f, base + FSPI_RFDR + j * 4);
+				memcpy(buf, &tmp, 4);
+				tmp_size -= 4;
+				j++;
+				buf += 4;
+			}
+			/* move the FIFO pointer */
+			fspi_writel(f, FSPI_INTR_IPRXWA, base + FSPI_INTR);
+			len -= wm_size;
+		}
+
+		size = len % wm_size;
+
+		j = 0;
+		if (size) {
+			/* Wait for RXFIFO available */
+			ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
+						   FSPI_INTR_IPRXWA, 0,
+						   POLL_TOUT, true);
+			WARN_ON(ret);
+
+			while (len > 0) {
+				tmp = 0;
+				size = (len < 4) ? len : 4;
+				tmp = fspi_readl(f, base + FSPI_RFDR + j * 4);
+				memcpy(buf, &tmp, size);
+				len -= size;
+				j++;
+				buf += size;
+			}
+		}
+
+		/* invalid the RXFIFO */
+		fspi_writel(f, FSPI_IPRXFCR_CLR, base + FSPI_IPRXFCR);
+		/* move the FIFO pointer */
+		fspi_writel(f, FSPI_INTR_IPRXWA, base + FSPI_INTR);
+	}
+}
+
+static int nxp_fspi_do_op(struct nxp_fspi *f, const struct spi_mem_op *op)
+{
+	void __iomem *base = f->iobase;
+	int seqnum = 0;
+	int err = 0;
+	u32 reg;
+
+	reg = fspi_readl(f, base + FSPI_IPRXFCR);
+	/* invalid RXFIFO first */
+	reg &= ~FSPI_IPRXFCR_DMA_EN;
+	reg = reg | FSPI_IPRXFCR_CLR;
+	fspi_writel(f, reg, base + FSPI_IPRXFCR);
+
+	init_completion(&f->c);
+
+	fspi_writel(f, op->addr.val, base + FSPI_IPCR0);
+	/*
+	 * Always start the sequence at the same index since we update
+	 * the LUT at each exec_op() call. And also specify the DATA
+	 * length, since it's has not been specified in the LUT.
+	 */
+	fspi_writel(f, op->data.nbytes |
+		 (SEQID_LUT << FSPI_IPCR1_SEQID_SHIFT) |
+		 (seqnum << FSPI_IPCR1_SEQNUM_SHIFT),
+		 base + FSPI_IPCR1);
+
+	/* Trigger the LUT now. */
+	fspi_writel(f, FSPI_IPCMD_TRG, base + FSPI_IPCMD);
+
+	/* Wait for the interrupt. */
+	if (!wait_for_completion_timeout(&f->c, msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+
+	/* Invoke IP data read, if request is of data read. */
+	if (!err && op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN)
+		nxp_fspi_read_rxfifo(f, op);
+
+	return err;
+}
+
+static int nxp_fspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
+	int err = 0;
+
+	mutex_lock(&f->lock);
+
+	/* Wait for controller being ready. */
+	err = fspi_readl_poll_tout(f, f->iobase + FSPI_STS0,
+				   FSPI_STS0_ARB_IDLE, 1, POLL_TOUT, true);
+	WARN_ON(err);
+
+	nxp_fspi_select_mem(f, mem->spi);
+
+	nxp_fspi_prepare_lut(f, op);
+	/*
+	 * If we have large chunks of data, we read them through the AHB bus
+	 * by accessing the mapped memory. In all other cases we use
+	 * IP commands to access the flash.
+	 */
+	if (op->data.nbytes > (f->devtype_data->rxfifo - 4) &&
+	    op->data.dir == SPI_MEM_DATA_IN) {
+		nxp_fspi_read_ahb(f, op);
+	} else {
+		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+			nxp_fspi_fill_txfifo(f, op);
+
+		err = nxp_fspi_do_op(f, op);
+
+		/* Invalidate the data in the AHB buffer. */
+		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+			nxp_fspi_invalid(f);
+	}
+
+	mutex_unlock(&f->lock);
+
+	return err;
+}
+
+static int nxp_fspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		if (op->data.nbytes > f->devtype_data->txfifo)
+			op->data.nbytes = f->devtype_data->txfifo;
+	} else {
+		if (op->data.nbytes > f->devtype_data->ahb_buf_size)
+			op->data.nbytes = f->devtype_data->ahb_buf_size;
+		else if (op->data.nbytes > (f->devtype_data->rxfifo - 4))
+			op->data.nbytes = ALIGN_DOWN(op->data.nbytes, 8);
+	}
+
+	return 0;
+}
+
+static int nxp_fspi_default_setup(struct nxp_fspi *f)
+{
+	void __iomem *base = f->iobase;
+	int ret, i;
+	u32 reg;
+
+	/* disable and unprepare clock to avoid glitch pass to controller */
+	nxp_fspi_clk_disable_unprep(f);
+
+	/* the default frequency, we will change it later if necessary. */
+	ret = clk_set_rate(f->clk, 20000000);
+	if (ret)
+		return ret;
+
+	ret = nxp_fspi_clk_prep_enable(f);
+	if (ret)
+		return ret;
+
+	/* Reset the module */
+	/* w1c register, wait unit clear */
+	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
+				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
+	WARN_ON(ret);
+
+	/* Disable the module */
+	fspi_writel(f, FSPI_MCR0_MDIS, base + FSPI_MCR0);
+
+	/* Reset the DLL register to default value */
+	fspi_writel(f, FSPI_DLLACR_OVRDEN, base + FSPI_DLLACR);
+	fspi_writel(f, FSPI_DLLBCR_OVRDEN, base + FSPI_DLLBCR);
+
+	/* enable module */
+	fspi_writel(f, FSPI_MCR0_AHB_TIMEOUT(0xFF) | FSPI_MCR0_IP_TIMEOUT(0xFF),
+		 base + FSPI_MCR0);
+
+	/*
+	 * Disable same device enable bit and configure all slave devices
+	 * independently.
+	 */
+	reg = fspi_readl(f, f->iobase + FSPI_MCR2);
+	reg = reg & ~(FSPI_MCR2_SAMEDEVICEEN);
+	fspi_writel(f, reg, base + FSPI_MCR2);
+
+	/* AHB configuration for access buffer 0~7. */
+	for (i = 0; i < 7; i++)
+		fspi_writel(f, 0, base + FSPI_AHBRX_BUF0CR0 + 4 * i);
+
+	/*
+	 * Set ADATSZ with the maximum AHB buffer size to improve the read
+	 * performance.
+	 */
+	fspi_writel(f, (f->devtype_data->ahb_buf_size / 8 |
+		  FSPI_AHBRXBUF0CR7_PREF), base + FSPI_AHBRX_BUF7CR0);
+
+	/* prefetch and no start address alignment limitation */
+	fspi_writel(f, FSPI_AHBCR_PREF_EN | FSPI_AHBCR_RDADDROPT,
+		 base + FSPI_AHBCR);
+
+	/* AHB Read - Set lut sequence ID for all CS. */
+	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA1CR2);
+	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA2CR2);
+	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHB1CR2);
+	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHB2CR2);
+
+	f->selected = -1;
+
+	/* enable the interrupt */
+	fspi_writel(f, FSPI_INTEN_IPCMDDONE, base + FSPI_INTEN);
+
+	return 0;
+}
+
+static const struct spi_controller_mem_ops nxp_fspi_mem_ops = {
+	.adjust_op_size = nxp_fspi_adjust_op_size,
+	.supports_op = nxp_fspi_supports_op,
+	.exec_op = nxp_fspi_exec_op,
+};
+
+static int nxp_fspi_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	struct nxp_fspi *f;
+	int ret;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*f));
+	if (!ctlr)
+		return -ENOMEM;
+
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD |
+			  SPI_TX_DUAL | SPI_TX_QUAD;
+
+	f = spi_controller_get_devdata(ctlr);
+	f->dev = dev;
+	f->devtype_data = of_device_get_match_data(dev);
+	if (!f->devtype_data) {
+		ret = -ENODEV;
+		goto err_put_ctrl;
+	}
+
+	platform_set_drvdata(pdev, f);
+
+	/* find the resources - configuration register address space */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_base");
+	f->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(f->iobase)) {
+		ret = PTR_ERR(f->iobase);
+		goto err_put_ctrl;
+	}
+
+	/* find the resources - controller memory mapped space */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_mmap");
+	f->ahb_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(f->ahb_addr)) {
+		ret = PTR_ERR(f->ahb_addr);
+		goto err_put_ctrl;
+	}
+
+	/* assign memory mapped starting address and mapped size. */
+	f->memmap_phy = res->start;
+	f->memmap_phy_size = resource_size(res);
+
+	/* find the clocks */
+	f->clk_en = devm_clk_get(dev, "fspi_en");
+	if (IS_ERR(f->clk_en)) {
+		ret = PTR_ERR(f->clk_en);
+		goto err_put_ctrl;
+	}
+
+	f->clk = devm_clk_get(dev, "fspi");
+	if (IS_ERR(f->clk)) {
+		ret = PTR_ERR(f->clk);
+		goto err_put_ctrl;
+	}
+
+	ret = nxp_fspi_clk_prep_enable(f);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		goto err_put_ctrl;
+	}
+
+	/* find the irq */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "failed to get the irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	ret = devm_request_irq(dev, ret,
+			nxp_fspi_irq_handler, 0, pdev->name, f);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	mutex_init(&f->lock);
+
+	ctlr->bus_num = -1;
+	ctlr->num_chipselect = NXP_FSPI_MAX_CHIPSELECT;
+	ctlr->mem_ops = &nxp_fspi_mem_ops;
+
+	nxp_fspi_default_setup(f);
+
+	ctlr->dev.of_node = np;
+
+	ret = spi_register_controller(ctlr);
+	if (ret)
+		goto err_destroy_mutex;
+
+	return 0;
+
+err_destroy_mutex:
+	mutex_destroy(&f->lock);
+
+err_disable_clk:
+	nxp_fspi_clk_disable_unprep(f);
+
+err_put_ctrl:
+	spi_controller_put(ctlr);
+
+	dev_err(dev, "NXP FSPI probe failed\n");
+	return ret;
+}
+
+static int nxp_fspi_remove(struct platform_device *pdev)
+{
+	struct nxp_fspi *f = platform_get_drvdata(pdev);
+
+	/* disable the hardware */
+	fspi_writel(f, FSPI_MCR0_MDIS, f->iobase + FSPI_MCR0);
+
+	nxp_fspi_clk_disable_unprep(f);
+
+	mutex_destroy(&f->lock);
+
+	return 0;
+}
+
+static int nxp_fspi_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int nxp_fspi_resume(struct device *dev)
+{
+	struct nxp_fspi *f = dev_get_drvdata(dev);
+
+	nxp_fspi_default_setup(f);
+
+	return 0;
+}
+
+static const struct of_device_id nxp_fspi_dt_ids[] = {
+	{ .compatible = "nxp,lx2160a-fspi", .data = (void *)&lx2160a_data, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, nxp_fspi_dt_ids);
+
+static const struct dev_pm_ops nxp_fspi_pm_ops = {
+	.suspend	= nxp_fspi_suspend,
+	.resume		= nxp_fspi_resume,
+};
+
+static struct platform_driver nxp_fspi_driver = {
+	.driver = {
+		.name	= "nxp-fspi",
+		.of_match_table = nxp_fspi_dt_ids,
+		.pm =   &nxp_fspi_pm_ops,
+	},
+	.probe          = nxp_fspi_probe,
+	.remove		= nxp_fspi_remove,
+};
+module_platform_driver(nxp_fspi_driver);
+
+MODULE_DESCRIPTION("NXP FSPI Controller Driver");
+MODULE_AUTHOR("NXP Semiconductor");
+MODULE_LICENSE("GPL v2");