[03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

Message ID 1527686082-15142-4-git-send-email-frieder.schrempf@exceet.de
State New
Delegated to: Boris Brezillon
Headers show
Series
  • Port the FSL QSPI driver to the SPI framework
Related show

Commit Message

Frieder Schrempf May 30, 2018, 1:14 p.m.
This driver is derived from the SPI NOR driver at
mtd/spi-nor/fsl-quadspi.c. It uses the new SPI memory interface
of the SPI framework to issue flash memory operations to up to
four connected flash chips (2 buses with 2 CS each).

The controller does not support generic SPI messages.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
---
 drivers/spi/Kconfig        |  11 +
 drivers/spi/Makefile       |   1 +
 drivers/spi/spi-fsl-qspi.c | 929 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 941 insertions(+)

Comments

Yogesh Narayan Gaur May 30, 2018, 1:50 p.m. | #1
Hi Frieder,

Thanks for migrating the fsl-quadspi.c driver on the new SPI framework. 
This patch is using dynamic LUT approach to create the LUT at run time instead of fixed static LUT as being used in current driver present at mtd/spi-nor/fsl-quadspi.c.
I have pushed the changes for dynamic LUT on mtd/spi-nor/fsl-quadspi.c and v10 has been in review stage.

Request you to please add 'signed-off' mentioned in those patches in this patch, patchwork link is https://patchwork.ozlabs.org/patch/896534/

Thanks
Yogesh Gaur

-----Original Message-----
From: Frieder Schrempf [mailto:frieder.schrempf@exceet.de] 
Sent: Wednesday, May 30, 2018 6:45 PM
To: linux-mtd@lists.infradead.org; boris.brezillon@bootlin.com; linux-spi@vger.kernel.org
Cc: dwmw2@infradead.org; computersforpeace@gmail.com; marek.vasut@gmail.com; richard@nod.at; miquel.raynal@bootlin.com; broonie@kernel.org; David Wolfe <david.wolfe@nxp.com>; Fabio Estevam <fabio.estevam@nxp.com>; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>; Han Xu <han.xu@nxp.com>; Frieder Schrempf <frieder.schrempf@exceet.de>; linux-kernel@vger.kernel.org
Subject: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

This driver is derived from the SPI NOR driver at mtd/spi-nor/fsl-quadspi.c. It uses the new SPI memory interface of the SPI framework to issue flash memory operations to up to four connected flash chips (2 buses with 2 CS each).

The controller does not support generic SPI messages.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
---
 drivers/spi/Kconfig        |  11 +
 drivers/spi/Makefile       |   1 +
 drivers/spi/spi-fsl-qspi.c | 929 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 941 insertions(+)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index e62ac32..6de0df5 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -251,6 +251,17 @@ config SPI_FSL_LPSPI
 	help
 	  This enables Freescale i.MX LPSPI controllers in master mode.
 
+config SPI_FSL_QSPI
+	tristate "Freescale QSPI controller"
+	depends on ARCH_MXC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
+	depends on HAS_IOMEM
+	help
+	  This enables support for the Quad SPI controller in master mode.
+	  Up to four flash chips can be connected on two buses with two
+	  chipselects each.
+	  This controller does not support generic SPI messages. It only
+	  supports the high-level SPI memory interface.
+
 config SPI_GPIO
 	tristate "GPIO-based bitbanging SPI Master"
 	depends on GPIOLIB || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index cb1f437..a8f7fda 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_SPI_FSL_DSPI)		+= spi-fsl-dspi.o
 obj-$(CONFIG_SPI_FSL_LIB)		+= spi-fsl-lib.o
 obj-$(CONFIG_SPI_FSL_ESPI)		+= spi-fsl-espi.o
 obj-$(CONFIG_SPI_FSL_LPSPI)		+= spi-fsl-lpspi.o
+obj-$(CONFIG_SPI_FSL_QSPI)		+= spi-fsl-qspi.o
 obj-$(CONFIG_SPI_FSL_SPI)		+= spi-fsl-spi.o
 obj-$(CONFIG_SPI_GPIO)			+= spi-gpio.o
 obj-$(CONFIG_SPI_IMG_SPFI)		+= spi-img-spfi.o
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c new file mode 100644 index 0000000..c16d070
--- /dev/null
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -0,0 +1,929 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Freescale QuadSPI driver.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ * Copyright (C) 2018 Bootlin
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ *
+ * Transition to SPI MEM interface:
+ * Author:
+ *     Boris Brezillion <boris.brezillon@bootlin.com>
+ *     Frieder Schrempf <frieder.schrempf@exceet.de>
+ *
+ * Based on the original fsl-quadspi.c spi-nor driver:
+ * Author: Freescale Semiconductor, Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
+#include <linux/sizes.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+/*
+ * The driver only uses one single LUT entry, that is updated on
+ * each call of exec_op(). Index 0 is preset at boot with a basic
+ * read operation, so let's use the last entry (15).
+ */
+#define	SEQID_LUT			15
+
+/* Registers used by the driver */
+#define QUADSPI_MCR			0x00
+#define QUADSPI_MCR_RESERVED_MASK	(0xF << 16)
+#define QUADSPI_MCR_MDIS_MASK		BIT(14)
+#define QUADSPI_MCR_CLR_TXF_MASK	BIT(11)
+#define QUADSPI_MCR_CLR_RXF_MASK	BIT(10)
+#define QUADSPI_MCR_DDR_EN_MASK		BIT(7)
+#define QUADSPI_MCR_END_CFG_MASK	(0x3 << 2)
+#define QUADSPI_MCR_SWRSTHD_MASK	BIT(1)
+#define QUADSPI_MCR_SWRSTSD_MASK	BIT(0)
+
+#define QUADSPI_IPCR			0x08
+#define QUADSPI_IPCR_SEQID_SHIFT	24
+
+#define QUADSPI_BUF3CR			0x1c
+#define QUADSPI_BUF3CR_ALLMST_MASK	BIT(31)
+#define QUADSPI_BUF3CR_ADATSZ_SHIFT	8
+#define QUADSPI_BUF3CR_ADATSZ_MASK	(0xFF << QUADSPI_BUF3CR_ADATSZ_SHIFT)
+
+#define QUADSPI_BFGENCR			0x20
+#define QUADSPI_BFGENCR_SEQID_SHIFT	12
+
+#define QUADSPI_BUF0IND			0x30
+#define QUADSPI_BUF1IND			0x34
+#define QUADSPI_BUF2IND			0x38
+#define QUADSPI_SFAR			0x100
+
+#define QUADSPI_SMPR			0x108
+#define QUADSPI_SMPR_DDRSMP_MASK	(7 << 16)
+#define QUADSPI_SMPR_FSDLY_MASK		BIT(6)
+#define QUADSPI_SMPR_FSPHS_MASK		BIT(5)
+#define QUADSPI_SMPR_HSENA_MASK		BIT(0)
+
+#define QUADSPI_RBCT			0x110
+#define QUADSPI_RBCT_WMRK_MASK		0x1F
+#define QUADSPI_RBCT_RXBRD_USEIPS	BIT(8)
+
+#define QUADSPI_TBDR			0x154
+
+#define QUADSPI_SR			0x15c
+#define QUADSPI_SR_IP_ACC_MASK		BIT(1)
+#define QUADSPI_SR_AHB_ACC_MASK		BIT(2)
+
+#define QUADSPI_FR			0x160
+#define QUADSPI_FR_TFF_MASK		BIT(0)
+
+#define QUADSPI_SPTRCLR			0x16c
+#define QUADSPI_SPTRCLR_IPPTRC		BIT(8)
+#define QUADSPI_SPTRCLR_BFPTRC		BIT(0)
+
+#define QUADSPI_SFA1AD			0x180
+#define QUADSPI_SFA2AD			0x184
+#define QUADSPI_SFB1AD			0x188
+#define QUADSPI_SFB2AD			0x18c
+#define QUADSPI_RBDR(x)			(0x200 + ((x) * 4))
+
+#define QUADSPI_LUTKEY			0x300
+#define QUADSPI_LUTKEY_VALUE		0x5AF05AF0
+
+#define QUADSPI_LCKCR			0x304
+#define QUADSPI_LCKER_LOCK		BIT(0)
+#define QUADSPI_LCKER_UNLOCK		BIT(1)
+
+#define QUADSPI_RSER			0x164
+#define QUADSPI_RSER_TFIE		BIT(0)
+
+#define QUADSPI_LUT_BASE		0x310
+#define QUADSPI_LUT_OFFSET		(SEQID_LUT * 4 * 4)
+#define QUADSPI_LUT_REG(idx)		(QUADSPI_LUT_BASE + \
+					QUADSPI_LUT_OFFSET + (idx) * 4)
+
+/* Instruction set for the LUT register */
+#define LUT_STOP		0
+#define LUT_CMD			1
+#define LUT_ADDR		2
+#define LUT_DUMMY		3
+#define LUT_MODE		4
+#define LUT_MODE2		5
+#define LUT_MODE4		6
+#define LUT_FSL_READ		7
+#define LUT_FSL_WRITE		8
+#define LUT_JMP_ON_CS		9
+#define LUT_ADDR_DDR		10
+#define LUT_MODE_DDR		11
+#define LUT_MODE2_DDR		12
+#define LUT_MODE4_DDR		13
+#define LUT_FSL_READ_DDR	14
+#define LUT_FSL_WRITE_DDR	15
+#define LUT_DATA_LEARN		16
+
+/*
+ * The PAD definitions for LUT register.
+ *
+ * The pad stands for the number of IO lines [0:3].
+ * For example, the quad read needs four IO lines,
+ * so you should use LUT_PAD(4).
+ */
+#define LUT_PAD(x) (fls(x) - 1)
+
+/*
+ * Macro for constructing the LUT entries with the following
+ * register layout:
+ *
+ *  ---------------------------------------------------
+ *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
+ *  ---------------------------------------------------
+ */
+#define LUT_DEF(idx, ins, pad, opr)					\
+	((((ins) << 10) | ((pad) << 8) | (opr)) << (((idx) % 2) * 16))
+
+/* Controller needs driver to swap endianness */
+#define QUADSPI_QUIRK_SWAP_ENDIAN	BIT(0)
+
+/* Controller needs 4x internal clock */
+#define QUADSPI_QUIRK_4X_INT_CLK	BIT(1)
+
+/*
+ * TKT253890, the controller needs the driver to fill the txfifo with
+ * 16 bytes at least to trigger a data transfer, even though the extra
+ * data won't be transferred.
+ */
+#define QUADSPI_QUIRK_TKT253890		BIT(2)
+
+/* TKT245618, the controller cannot wake up from wait mode */
+#define QUADSPI_QUIRK_TKT245618		BIT(3)
+
+enum fsl_qspi_devtype {
+	FSL_QUADSPI_VYBRID,
+	FSL_QUADSPI_IMX6SX,
+	FSL_QUADSPI_IMX7D,
+	FSL_QUADSPI_IMX6UL,
+	FSL_QUADSPI_LS1021A,
+	FSL_QUADSPI_LS2080A,
+};
+
+struct fsl_qspi_devtype_data {
+	enum fsl_qspi_devtype devtype;
+	unsigned int rxfifo;
+	unsigned int txfifo;
+	unsigned int ahb_buf_size;
+	unsigned int quirks;
+};
+
+static const struct fsl_qspi_devtype_data vybrid_data = {
+	.devtype = FSL_QUADSPI_VYBRID,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_SWAP_ENDIAN,
+};
+
+static const struct fsl_qspi_devtype_data imx6sx_data = {
+	.devtype = FSL_QUADSPI_IMX6SX,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_4X_INT_CLK | QUADSPI_QUIRK_TKT245618, };
+
+static const struct fsl_qspi_devtype_data imx7d_data = {
+	.devtype = FSL_QUADSPI_IMX7D,
+	.rxfifo = SZ_512,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK, };
+
+static const struct fsl_qspi_devtype_data imx6ul_data = {
+	.devtype = FSL_QUADSPI_IMX6UL,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK, };
+
+static const struct fsl_qspi_devtype_data ls1021a_data = {
+	.devtype = FSL_QUADSPI_LS1021A,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = 0,
+};
+
+static const struct fsl_qspi_devtype_data ls2080a_data = {
+	.devtype = FSL_QUADSPI_LS2080A,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890,
+};
+
+struct fsl_qspi {
+	void __iomem *iobase;
+	void __iomem *ahb_addr;
+	u32 memmap_phy;
+	struct clk *clk, *clk_en;
+	struct device *dev;
+	struct completion c;
+	const struct fsl_qspi_devtype_data *devtype_data;
+	bool big_endian;
+	struct mutex lock;
+	struct pm_qos_request pm_qos_req;
+	int selected;
+};
+
+static inline int needs_swap_endian(struct fsl_qspi *q) {
+	return q->devtype_data->quirks & QUADSPI_QUIRK_SWAP_ENDIAN; }
+
+static inline int needs_4x_clock(struct fsl_qspi *q) {
+	return q->devtype_data->quirks & QUADSPI_QUIRK_4X_INT_CLK; }
+
+static inline int needs_fill_txfifo(struct fsl_qspi *q) {
+	return q->devtype_data->quirks & QUADSPI_QUIRK_TKT253890; }
+
+static inline int needs_wakeup_wait_mode(struct fsl_qspi *q) {
+	return q->devtype_data->quirks & QUADSPI_QUIRK_TKT245618; }
+
+/*
+ * An IC bug makes it necessary to rearrange the 32-bit data.
+ * Later chips, such as IMX6SLX, have fixed this bug.
+ */
+static inline u32 fsl_qspi_endian_xchg(struct fsl_qspi *q, u32 a) {
+	return needs_swap_endian(q) ? __swab32(a) : a; }
+
+/*
+ * R/W functions for big- or little-endian registers:
+ * The QSPI controller's endianness is independent of
+ * the CPU core's endianness. So far, although the CPU
+ * core is little-endian the QSPI controller can use
+ * big-endian or little-endian.
+ */
+static void qspi_writel(struct fsl_qspi *q, u32 val, void __iomem 
+*addr) {
+	if (q->big_endian)
+		iowrite32be(val, addr);
+	else
+		iowrite32(val, addr);
+}
+
+static u32 qspi_readl(struct fsl_qspi *q, void __iomem *addr) {
+	if (q->big_endian)
+		return ioread32be(addr);
+	else
+		return ioread32(addr);
+}
+
+static irqreturn_t fsl_qspi_irq_handler(int irq, void *dev_id) {
+	struct fsl_qspi *q = dev_id;
+	u32 reg;
+
+	/* clear interrupt */
+	reg = qspi_readl(q, q->iobase + QUADSPI_FR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_FR);
+
+	if (reg & QUADSPI_FR_TFF_MASK)
+		complete(&q->c);
+
+	dev_dbg(q->dev, "QUADSPI_FR : 0x%.8x:0x%.8x\n", 0, reg);
+	return IRQ_HANDLED;
+}
+
+static int fsl_qspi_check_buswidth(struct fsl_qspi *q, u8 width) {
+	switch (width) {
+	case 1:
+	case 2:
+	case 4:
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static bool fsl_qspi_supports_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	ret = fsl_qspi_check_buswidth(q, op->cmd.buswidth);
+
+	if (op->addr.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->addr.buswidth);
+
+	if (op->dummy.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->dummy.buswidth);
+
+	if (op->data.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->data.buswidth);
+
+	if (ret)
+		return false;
+
+	/*
+	 * The number of instructions needed for the op, needs
+	 * to fit into a single LUT entry.
+	 */
+	if (op->addr.nbytes +
+	   (op->dummy.nbytes ? 1:0) +
+	   (op->data.nbytes ? 1:0) > 6)
+		return false;
+
+	/* Max 64 dummy clock cycles supported */
+	if (op->dummy.nbytes * 8 / op->dummy.buswidth > 64)
+		return false;
+
+	/* Max data length, check controller limits and alignment */
+	if (op->data.dir == SPI_MEM_DATA_IN &&
+	    (op->data.nbytes > q->devtype_data->ahb_buf_size ||
+	     (op->data.nbytes > q->devtype_data->rxfifo - 4 &&
+	      !IS_ALIGNED(op->data.nbytes, 8))))
+		return false;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT &&
+	    op->data.nbytes > q->devtype_data->txfifo)
+		return false;
+
+	return true;
+}
+
+static void fsl_qspi_prepare_lut(struct fsl_qspi *q,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	u32 lutval[4] = {};
+	int lutidx = 1, i;
+
+	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
+			     op->cmd.opcode);
+
+	/*
+	 * For some unknown reason, using LUT_ADDR doesn't work in some
+	 * cases (at least with only one byte long addresses), so
+	 * let's use LUT_MODE to write the address bytes one by one
+	 */
+	for (i = 0; i < op->addr.nbytes; i++) {
+		u8 addrbyte = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_MODE,
+					      LUT_PAD(op->addr.buswidth),
+					      addrbyte);
+		lutidx++;
+	}
+
+	if (op->dummy.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
+					      LUT_PAD(op->dummy.buswidth),
+					      op->dummy.nbytes * 8 /
+					      op->dummy.buswidth);
+		lutidx++;
+	}
+
+	if (op->data.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx,
+					      op->data.dir == SPI_MEM_DATA_IN ?
+					      LUT_FSL_READ : LUT_FSL_WRITE,
+					      LUT_PAD(op->data.buswidth),
+					      0);
+		lutidx++;
+	}
+
+	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
+
+	/* unlock LUT */
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
+
+	/* fill LUT */
+	for (i = 0; i < ARRAY_SIZE(lutval); i++)
+		qspi_writel(q, lutval[i], base + QUADSPI_LUT_REG(i));
+
+	/* lock LUT */
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR); }
+
+static int fsl_qspi_clk_prep_enable(struct fsl_qspi *q) {
+	int ret;
+
+	ret = clk_prepare_enable(q->clk_en);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(q->clk);
+	if (ret) {
+		clk_disable_unprepare(q->clk_en);
+		return ret;
+	}
+
+	if (needs_wakeup_wait_mode(q))
+		pm_qos_add_request(&q->pm_qos_req, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	return 0;
+}
+
+static void fsl_qspi_clk_disable_unprep(struct fsl_qspi *q) {
+	if (needs_wakeup_wait_mode(q))
+		pm_qos_remove_request(&q->pm_qos_req);
+
+	clk_disable_unprepare(q->clk);
+	clk_disable_unprepare(q->clk_en);
+}
+
+static void fsl_qspi_select_mem(struct fsl_qspi *q, struct spi_device 
+*spi) {
+	unsigned long rate = spi->max_speed_hz;
+	int ret, i;
+	u32 map_addr;
+
+	if (q->selected == spi->chip_select)
+		return;
+
+	/*
+	 * In HW there can be a maximum of four chips on two buses with
+	 * two chip selects on each bus. We use four chip selects in SW
+	 * to differentiate between the four chips.
+	 * We use the SFA1AD, SFA2AD, SFB1AD, SFB2AD registers to select
+	 * the chip we want to access.
+	 */
+	for (i = 0; i < 4; i++) {
+		if (i < spi->chip_select)
+			map_addr = q->memmap_phy;
+		else
+			map_addr = q->memmap_phy +
+				   2 * q->devtype_data->ahb_buf_size;
+
+		qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
+	}
+
+	if (needs_4x_clock(q))
+		rate *= 4;
+
+	fsl_qspi_clk_disable_unprep(q);
+
+	ret = clk_set_rate(q->clk, rate);
+	if (ret)
+		return;
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret)
+		return;
+
+	q->selected = spi->chip_select;
+}
+
+static void fsl_qspi_read_ahb(struct fsl_qspi *q, const struct 
+spi_mem_op *op) {
+	static int seq;
+
+	/*
+	 * We want to avoid needing to invalidate the cache by issueing
+	 * a reset to the AHB and Serial Flash domain, as this needs
+	 * time. So we change the address on each read to trigger an
+	 * actual read operation on the flash. The actual address for
+	 * the flash memory is set by programming the LUT.
+	 */
+	memcpy_fromio(op->data.buf.in,
+		      q->ahb_addr +
+		      (seq * q->devtype_data->ahb_buf_size),
+		      op->data.nbytes);
+
+	seq = seq ? 0 : 1;
+}
+
+static void fsl_qspi_fill_txfifo(struct fsl_qspi *q,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int i;
+
+	for (i = 0; i < op->data.nbytes; i += 4) {
+		u32 val = 0;
+
+		memcpy(&val, op->data.buf.out + i,
+		       min_t(unsigned int, op->data.nbytes - i, 4));
+
+		val = fsl_qspi_endian_xchg(q, val);
+		qspi_writel(q, val, base + QUADSPI_TBDR);
+	}
+
+	if (needs_fill_txfifo(q)) {
+		for (; i < 16; i += 4)
+			qspi_writel(q, 0, base + QUADSPI_TBDR);
+	}
+}
+
+static void fsl_qspi_read_rxfifo(struct fsl_qspi *q,
+			  const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int i;
+	u8 *buf = op->data.buf.in;
+
+	for (i = 0; i < op->data.nbytes; i += 4) {
+		u32 val = qspi_readl(q, base + QUADSPI_RBDR(i / 4));
+
+		val = fsl_qspi_endian_xchg(q, val);
+
+		memcpy(buf + i, &val,
+		       min_t(unsigned int, op->data.nbytes - i, 4));
+	}
+}
+
+static int fsl_qspi_do_op(struct fsl_qspi *q, const struct spi_mem_op 
+*op) {
+	void __iomem *base = q->iobase;
+	int err = 0;
+
+	init_completion(&q->c);
+
+	/*
+	 * Always start the sequence at the same index since we update
+	 * the LUT at each exec_op() call. And also specify the DATA
+	 * length, since it's has not been specified in the LUT.
+	 */
+	qspi_writel(q, op->data.nbytes |
+		    (SEQID_LUT << QUADSPI_IPCR_SEQID_SHIFT),
+		    base + QUADSPI_IPCR);
+
+	/* Wait for the interrupt. */
+	if (!wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+
+	if (!err && op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN)
+		fsl_qspi_read_rxfifo(q, op);
+
+	return err;
+}
+
+static int fsl_qspi_exec_op(struct spi_mem *mem, const struct 
+spi_mem_op *op) {
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	void __iomem *base = q->iobase;
+	int err = 0;
+
+	mutex_lock(&q->lock);
+
+	/* wait for the controller being ready */
+	do {
+		u32 status;
+
+		status = qspi_readl(q, base + QUADSPI_SR);
+		if (status &
+		    (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
+			udelay(1);
+			dev_dbg(q->dev, "The controller is busy, 0x%x\n",
+				status);
+			continue;
+		}
+		break;
+	} while (1);
+
+	fsl_qspi_select_mem(q, mem->spi);
+
+	qspi_writel(q, q->memmap_phy, base + QUADSPI_SFAR);
+
+	qspi_writel(q,
+		    qspi_readl(q, base + QUADSPI_MCR) |
+		    QUADSPI_MCR_CLR_RXF_MASK | QUADSPI_MCR_CLR_TXF_MASK,
+		    base + QUADSPI_MCR);
+
+	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC | QUADSPI_SPTRCLR_IPPTRC,
+		    base + QUADSPI_SPTRCLR);
+
+	fsl_qspi_prepare_lut(q, op);
+
+	/*
+	 * If we have large chunks of data, we read them through the AHB bus
+	 * by accessing the mapped memory. In all other cases we use
+	 * IP commands to access the flash.
+	 */
+	if (op->data.nbytes > (q->devtype_data->rxfifo - 4) &&
+	    op->data.dir == SPI_MEM_DATA_IN) {
+		fsl_qspi_read_ahb(q, op);
+	} else {
+		qspi_writel(q,
+			    QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
+			    base + QUADSPI_RBCT);
+
+		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+			fsl_qspi_fill_txfifo(q, op);
+
+		err = fsl_qspi_do_op(q, op);
+	}
+
+	mutex_unlock(&q->lock);
+
+	return err;
+}
+
+static int fsl_qspi_adjust_op_size(struct spi_mem *mem, struct 
+spi_mem_op *op) {
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		if (op->data.nbytes > q->devtype_data->txfifo)
+			op->data.nbytes = q->devtype_data->txfifo;
+	} else {
+		if (op->data.nbytes > q->devtype_data->ahb_buf_size)
+			op->data.nbytes = q->devtype_data->ahb_buf_size;
+		else if (op->data.nbytes > (q->devtype_data->rxfifo - 4))
+			op->data.nbytes = ALIGN_DOWN(op->data.nbytes, 8);
+	}
+
+	return 0;
+}
+
+static int fsl_qspi_default_setup(struct fsl_qspi *q) {
+	void __iomem *base = q->iobase;
+	u32 reg;
+	int ret;
+
+	/* disable and unprepare clock to avoid glitch pass to controller */
+	fsl_qspi_clk_disable_unprep(q);
+
+	/* the default frequency, we will change it later if necessary. */
+	ret = clk_set_rate(q->clk, 66000000);
+	if (ret)
+		return ret;
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret)
+		return ret;
+
+	/* Reset the module */
+	qspi_writel(q, QUADSPI_MCR_SWRSTSD_MASK | QUADSPI_MCR_SWRSTHD_MASK,
+		base + QUADSPI_MCR);
+	udelay(1);
+
+	/* Disable the module */
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
+			base + QUADSPI_MCR);
+
+	reg = qspi_readl(q, base + QUADSPI_SMPR);
+	qspi_writel(q, reg & ~(QUADSPI_SMPR_FSDLY_MASK
+			| QUADSPI_SMPR_FSPHS_MASK
+			| QUADSPI_SMPR_HSENA_MASK
+			| QUADSPI_SMPR_DDRSMP_MASK), base + QUADSPI_SMPR);
+
+	/* We only use the buffer3 for AHB read */
+	qspi_writel(q, 0, base + QUADSPI_BUF0IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
+
+	qspi_writel(q, SEQID_LUT << QUADSPI_BFGENCR_SEQID_SHIFT,
+		    q->iobase + QUADSPI_BFGENCR);
+	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, base + QUADSPI_RBCT);
+	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
+		    ((q->devtype_data->ahb_buf_size / 8) <<
+		    QUADSPI_BUF3CR_ADATSZ_SHIFT),
+		    base + QUADSPI_BUF3CR);
+
+	q->selected = -1;
+
+	/* Enable the module */
+	qspi_writel(q, QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
+			base + QUADSPI_MCR);
+
+	/* clear all interrupt status */
+	qspi_writel(q, 0xffffffff, q->iobase + QUADSPI_FR);
+
+	/* enable the interrupt */
+	qspi_writel(q, QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
+
+	return 0;
+}
+
+static const char *fsl_qspi_get_name(struct spi_mem *mem) {
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	struct device *dev = &mem->spi->dev;
+	const char *name;
+
+	/*
+	 * In order to keep mtdparts compatible with the old MTD driver at
+	 * mtd/spi-nor/fsl-quadspi.c, we set a custom name derived from the
+	 * platform_device of the controller.
+	 */
+	if (of_get_available_child_count(q->dev->of_node) == 1)
+		name = dev_name(q->dev);
+	else
+		name = devm_kasprintf(dev, GFP_KERNEL,
+				      "%s-%d", dev_name(q->dev),
+				      mem->spi->chip_select);
+
+	if (!name) {
+		dev_err(dev, "failed to get memory for custom flash name\n");
+		return dev_name(q->dev);
+	}
+
+	return name;
+}
+
+static const struct spi_controller_mem_ops fsl_qspi_mem_ops = {
+	.adjust_op_size = fsl_qspi_adjust_op_size,
+	.supports_op = fsl_qspi_supports_op,
+	.exec_op = fsl_qspi_exec_op,
+	.get_name = fsl_qspi_get_name,
+};
+
+static int fsl_qspi_probe(struct platform_device *pdev) {
+	struct spi_controller *ctlr;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	struct fsl_qspi *q;
+	int ret;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*q));
+	if (!ctlr)
+		return -ENOMEM;
+
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD |
+			  SPI_TX_DUAL | SPI_TX_QUAD;
+
+	q = spi_controller_get_devdata(ctlr);
+	q->dev = dev;
+	q->devtype_data = of_device_get_match_data(dev);
+	if (!q->devtype_data) {
+		ret = -ENODEV;
+		goto err_put_ctrl;
+	}
+
+	platform_set_drvdata(pdev, q);
+
+	/* find the resources */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "QuadSPI");
+	q->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->iobase)) {
+		ret = PTR_ERR(q->iobase);
+		goto err_put_ctrl;
+	}
+
+	q->big_endian = of_property_read_bool(np, "big-endian");
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					"QuadSPI-memory");
+	q->ahb_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->ahb_addr)) {
+		ret = PTR_ERR(q->ahb_addr);
+		goto err_put_ctrl;
+	}
+
+	q->memmap_phy = res->start;
+
+	/* find the clocks */
+	q->clk_en = devm_clk_get(dev, "qspi_en");
+	if (IS_ERR(q->clk_en)) {
+		ret = PTR_ERR(q->clk_en);
+		goto err_put_ctrl;
+	}
+
+	q->clk = devm_clk_get(dev, "qspi");
+	if (IS_ERR(q->clk)) {
+		ret = PTR_ERR(q->clk);
+		goto err_put_ctrl;
+	}
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		goto err_put_ctrl;
+	}
+
+	/* find the irq */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "failed to get the irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	ret = devm_request_irq(dev, ret,
+			fsl_qspi_irq_handler, 0, pdev->name, q);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	mutex_init(&q->lock);
+
+	ctlr->bus_num = -1;
+	ctlr->num_chipselect = 4;
+	ctlr->mem_ops = &fsl_qspi_mem_ops;
+
+	fsl_qspi_default_setup(q);
+
+	ctlr->dev.of_node = np;
+
+	ret = spi_register_controller(ctlr);
+	if (ret)
+		goto err_destroy_mutex;
+
+	return 0;
+
+err_destroy_mutex:
+	mutex_destroy(&q->lock);
+
+err_disable_clk:
+	fsl_qspi_clk_disable_unprep(q);
+
+err_put_ctrl:
+	spi_controller_put(ctlr);
+
+	dev_err(dev, "Freescale QuadSPI probe failed\n");
+	return ret;
+}
+
+static int fsl_qspi_remove(struct platform_device *pdev) {
+	struct fsl_qspi *q = platform_get_drvdata(pdev);
+
+	/* disable the hardware */
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER);
+
+	fsl_qspi_clk_disable_unprep(q);
+
+	mutex_destroy(&q->lock);
+
+	if (q->ahb_addr)
+		iounmap(q->ahb_addr);
+
+	return 0;
+}
+
+static int fsl_qspi_suspend(struct platform_device *pdev, pm_message_t 
+state) {
+	return 0;
+}
+
+static int fsl_qspi_resume(struct platform_device *pdev) {
+	struct fsl_qspi *q = platform_get_drvdata(pdev);
+
+	fsl_qspi_default_setup(q);
+
+	return 0;
+}
+
+static const struct of_device_id fsl_qspi_dt_ids[] = {
+	{ .compatible = "fsl,vf610-qspi", .data = &vybrid_data, },
+	{ .compatible = "fsl,imx6sx-qspi", .data = &imx6sx_data, },
+	{ .compatible = "fsl,imx7d-qspi", .data = &imx7d_data, },
+	{ .compatible = "fsl,imx6ul-qspi", .data = &imx6ul_data, },
+	{ .compatible = "fsl,ls1021a-qspi", .data = &ls1021a_data, },
+	{ .compatible = "fsl,ls2080a-qspi", .data = &ls2080a_data, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_qspi_dt_ids);
+
+static struct platform_driver fsl_qspi_driver = {
+	.driver = {
+		.name	= "fsl-quadspi",
+		.of_match_table = fsl_qspi_dt_ids,
+	},
+	.probe          = fsl_qspi_probe,
+	.remove		= fsl_qspi_remove,
+	.suspend	= fsl_qspi_suspend,
+	.resume		= fsl_qspi_resume,
+};
+module_platform_driver(fsl_qspi_driver);
+
+MODULE_DESCRIPTION("Freescale QuadSPI Controller Driver"); 
+MODULE_AUTHOR("Freescale Semiconductor Inc."); MODULE_AUTHOR("Boris 
+Brezillion <boris.brezillon@bootlin.com>"); MODULE_AUTHOR("Frieder 
+Schrempf <frieder.schrempf@exceet.de>"); MODULE_LICENSE("GPL v2");
--
2.7.4
Boris Brezillon May 30, 2018, 2:24 p.m. | #2
Hi Yogesh,

On Wed, 30 May 2018 13:50:51 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Hi Frieder,
> 
> Thanks for migrating the fsl-quadspi.c driver on the new SPI
> framework. This patch is using dynamic LUT approach to create the LUT
> at run time instead of fixed static LUT as being used in current
> driver present at mtd/spi-nor/fsl-quadspi.c. I have pushed the
> changes for dynamic LUT on mtd/spi-nor/fsl-quadspi.c and v10 has been
> in review stage.
> 
> Request you to please add 'signed-off' mentioned in those patches in
> this patch, patchwork link is
> https://patchwork.ozlabs.org/patch/896534/

First, I'd like to state that this work has not been based on your
dynamic LUT code, and I actually asked you to adapt your code to match
the way we were handling it in the new driver (which at that time was
still under development). Then, even if you want to be cited as one of
the author of the new code, SoB tag is not the right way to do it (see
[1] for an explanation on when SoB should be added). Instead, you
should add your name in the copyright header and maybe be add a
MODULE_AUTHOR():

/*
 * Copyright ...
 * ...
 * Authors:
 *	...
 *	Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
 */

...

MODULE_AUTHOR("Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>");

Regards,

Boris

[1]https://elixir.bootlin.com/linux/latest/source/Documentation/process/submitting-patches.rst#L429
Boris Brezillon May 30, 2018, 2:58 p.m. | #3
Hi Frieder,

On Wed, 30 May 2018 15:14:32 +0200
Frieder Schrempf <frieder.schrempf@exceet.de> wrote:

> +
> +static const char *fsl_qspi_get_name(struct spi_mem *mem)
> +{
> +	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
> +	struct device *dev = &mem->spi->dev;
> +	const char *name;
> +
> +	/*
> +	 * In order to keep mtdparts compatible with the old MTD driver at
> +	 * mtd/spi-nor/fsl-quadspi.c, we set a custom name derived from the
> +	 * platform_device of the controller.
> +	 */
> +	if (of_get_available_child_count(q->dev->of_node) == 1)
> +		name = dev_name(q->dev);
> +	else
> +		name = devm_kasprintf(dev, GFP_KERNEL,
> +				      "%s-%d", dev_name(q->dev),
> +				      mem->spi->chip_select);
> +
> +	if (!name) {
> +		dev_err(dev, "failed to get memory for custom flash name\n");
> +		return dev_name(q->dev);

Hm, not sure that's what we want. We should probably fail when the
allocation fails.

How about letting ->get_name() return an error pointer or NULL in case
of error. With the other I made suggestion in my review of patch 1
(calling ->get_name() at probe time) you could refuse to probe the
device when ->get_name() fails.

> +	}
> +
> +	return name;
> +}
> +

Regards,

Boris
Frieder Schrempf May 30, 2018, 3:13 p.m. | #4
Hi Boris,

On 30.05.2018 16:58, Boris Brezillon wrote:
> Hi Frieder,
> 
> On Wed, 30 May 2018 15:14:32 +0200
> Frieder Schrempf <frieder.schrempf@exceet.de> wrote:
> 
>> +
>> +static const char *fsl_qspi_get_name(struct spi_mem *mem)
>> +{
>> +	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
>> +	struct device *dev = &mem->spi->dev;
>> +	const char *name;
>> +
>> +	/*
>> +	 * In order to keep mtdparts compatible with the old MTD driver at
>> +	 * mtd/spi-nor/fsl-quadspi.c, we set a custom name derived from the
>> +	 * platform_device of the controller.
>> +	 */
>> +	if (of_get_available_child_count(q->dev->of_node) == 1)
>> +		name = dev_name(q->dev);
>> +	else
>> +		name = devm_kasprintf(dev, GFP_KERNEL,
>> +				      "%s-%d", dev_name(q->dev),
>> +				      mem->spi->chip_select);
>> +
>> +	if (!name) {
>> +		dev_err(dev, "failed to get memory for custom flash name\n");
>> +		return dev_name(q->dev);
> 
> Hm, not sure that's what we want. We should probably fail when the
> allocation fails.

Right, we should definitely fail when the allocation fails.

> 
> How about letting ->get_name() return an error pointer or NULL in case
> of error. With the other I made suggestion in my review of patch 1
> (calling ->get_name() at probe time) you could refuse to probe the
> device when ->get_name() fails.

Ok, I will change that.

Thanks,

Frieder

> 
>> +	}
>> +
>> +	return name;
>> +}
>> +
> 
> Regards,
> 
> Boris
>
Frieder Schrempf June 1, 2018, 9:14 a.m. | #5
Hi Yogesh,

On 30.05.2018 16:24, Boris Brezillon wrote:
> Hi Yogesh,
> 
> On Wed, 30 May 2018 13:50:51 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
>> Hi Frieder,
>>
>> Thanks for migrating the fsl-quadspi.c driver on the new SPI
>> framework. This patch is using dynamic LUT approach to create the LUT
>> at run time instead of fixed static LUT as being used in current
>> driver present at mtd/spi-nor/fsl-quadspi.c. I have pushed the
>> changes for dynamic LUT on mtd/spi-nor/fsl-quadspi.c and v10 has been
>> in review stage.
>>
>> Request you to please add 'signed-off' mentioned in those patches in
>> this patch, patchwork link is
>> https://patchwork.ozlabs.org/patch/896534/

So for reasons already given by Boris, I won't add your S-o-b tags. But 
I can add your name (and that of Suresh Gupta?) to the file header and 
as MODULE_AUTHOR in the next version.

Regards,

Frieder

> 
> First, I'd like to state that this work has not been based on your
> dynamic LUT code, and I actually asked you to adapt your code to match
> the way we were handling it in the new driver (which at that time was
> still under development). Then, even if you want to be cited as one of
> the author of the new code, SoB tag is not the right way to do it (see
> [1] for an explanation on when SoB should be added). Instead, you
> should add your name in the copyright header and maybe be add a
> MODULE_AUTHOR():
> 
> /*
>   * Copyright ...
>   * ...
>   * Authors:
>   *	...
>   *	Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
>   */
> 
> ...
> 
> MODULE_AUTHOR("Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>");
> 
> Regards,
> 
> Boris
> 
> [1]https://elixir.bootlin.com/linux/latest/source/Documentation/process/submitting-patches.rst#L429
>
Boris Brezillon June 5, 2018, 3 p.m. | #6
On Wed, 30 May 2018 15:14:32 +0200
Frieder Schrempf <frieder.schrempf@exceet.de> wrote:

> +
> +static void fsl_qspi_read_ahb(struct fsl_qspi *q, const struct spi_mem_op *op)
> +{
> +	static int seq;
> +
> +	/*
> +	 * We want to avoid needing to invalidate the cache by issueing
> +	 * a reset to the AHB and Serial Flash domain, as this needs
> +	 * time. So we change the address on each read to trigger an
> +	 * actual read operation on the flash. The actual address for
> +	 * the flash memory is set by programming the LUT.
> +	 */
> +	memcpy_fromio(op->data.buf.in,
> +		      q->ahb_addr +
> +		      (seq * q->devtype_data->ahb_buf_size),
> +		      op->data.nbytes);
> +
> +	seq = seq ? 0 : 1;

We should get rid of this hack. Yogesh, Han, do you know if there's an
easy way to invalidate the AHB buffer without resetting the IP?

> +}
Yogesh Narayan Gaur June 8, 2018, 11:54 a.m. | #7
Hi Frieder,

I have tried to validate your patch on fsl,ls2080a target having 2 Spansion NOR flash, S25FS512S, as slave device.
Below are my observations:

Observation 1:
In Linux boot logs after driver probing is successful, getting below log messages
[    1.435986] m25p80 spi0.0: found s25fl512s, expected m25p80
[    1.441564] m25p80 spi0.0: s25fl512s (65536 Kbytes)
[    1.446972] m25p80 spi0.1: found s25fl512s, expected m25p80
[    1.452548] m25p80 spi0.1: s25fl512s (65536 Kbytes)

IMHO, we need to correct message as 'found s25fl512s, expected m25p80' as final underlying connected flash device is s25fl512s.

Observation 2:
I have observed data sanity issue after performing read/write operations using MTD interface. Explained below

root:~# mtd_debug erase /dev/mtd0 0x1000000 0x40000
Erased 262144 bytes from address 0x01000000 in flash                      --> Erase at address 0x1000000 of erase size 0x40000
root:~# mtd_debug read /dev/mtd0 0x0 0x100 rp
Copied 256 bytes from address 0x00000000 in flash to rp                   --> Read 0x100 bytes from flash from address 0x0 in file rp
root:~# mtd_debug write /dev/mtd0 0x1000000 0x100 rp
Copied 256 bytes from rp to address 0x01000000 in flash                   --> Write 0x100 bytes to flash address 0x1000000 from file rp
root:~# mtd_debug read /dev/mtd0 0x1000000 0x100 wp
Copied 256 bytes from address 0x01000000 in flash to wp                  --> Read 0x100 bytes from flash from address 0x1000000 in file wp
root:~# diff rp wp                                                                                           --> compare both rp and wp files, if they are different output comes on console stating file are different
Files rp and wp differ
root:~# hexdump wp
0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
0000010 0000 0000 0000 0000 0000 0000 0000 000a
0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
0000030 0000 0000 0040 0000 005b 0000 0000 0000
0000040 ffff ffff ffff ffff ffff ffff ffff ffff
*
0000100
root:~# hexdump rp
0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
0000010 0000 0000 0000 0000 0000 0000 0000 000a
0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
0000030 0000 0000 0040 0000 005b 0000 0000 0000
0000040 2403 0000 0000 0000 0000 0000 0000 0000
0000050 0000 0000 0000 0000 0000 0000 0000 0000
*
0000070 0011 0000 09e7 0000 0000 4411 9555 0050
0000080 0000 0000 0000 0000 f9bc afa1 0404 31e0
0000090 0000 0000 0400 31e0 0000 2010 08dc 31eb
00000a0 2880 0050 1300 31eb 4e20 8010 0000 80ff
00000b0 0000 0000 beef dead beef dead beef dead
00000c0 beef dead beef dead beef dead beef dead
*
0000100
root:~#

In hexdump output of the file which being read from address 0x1000000,wp, it can be observed that only first 64 bytes (0x40) are written on the flash.

Observation 3:
As we can support JFFS2 filesystem on NOR flash, so we can expect JFFS2 commands should work fine on NOR flash.
But with this driver change my mount command is not working.

In my target there are 2 flash slave devices connected, and I have given argument to create MTD partition like "mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs) " for 2nd flash.
Below is output for /proc/mtd commands
    root@ls1012ardb:~# cat /proc/mtd
    dev:    size   erasesize  name
    mtd0: 04000000 00040000 "20c0000.quadspi-0"   --> First 64MB flash
    mtd1: 00500000 00040000 "rcw"                               --> Second 64 MB flash device, 3 MTD partition are created for it.
    mtd2: 00a00000 00040000 "test"
    mtd3: 02e00000 00040000 "rootfs"

    root@ls1012ardb:~# mkdir /media/ram ; flash_eraseall /dev/mtd3
    flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
    Erasing 256 Kibyte @ 0 --  0 % complete [   18.299929] random: crng init done
    Erasing 256 Kibyte @ 2dc0000 -- 100 % complete
    root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/

This command didn't finish successfully and there are lot of messages coming on console mentioning failure in jffs2_scan_eraseblock()
    [  187.118677] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0000: 0x2886 instead
    [  187.128159] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0004: 0x7a3b instead
    [  187.137641] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0008: 0xb10f instead

If I remove this patch series and check with older implementation, JFFS2 mounting is working fine.

Observation 4:
With previous driver, we can read content of flash directly using devmem command
Like devmem 0x20000000  "Flash is connected at this Quad-SPI address"

But with new driver devmem interface reporting in-correct value.


Few other comments inline.

--
Regards,
Yogesh Gaur

-----Original Message-----
From: Frieder Schrempf [mailto:frieder.schrempf@exceet.de] 
Sent: Wednesday, May 30, 2018 6:45 PM
To: linux-mtd@lists.infradead.org; boris.brezillon@bootlin.com; linux-spi@vger.kernel.org
Cc: dwmw2@infradead.org; computersforpeace@gmail.com; marek.vasut@gmail.com; richard@nod.at; miquel.raynal@bootlin.com; broonie@kernel.org; David Wolfe <david.wolfe@nxp.com>; Fabio Estevam <fabio.estevam@nxp.com>; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>; Han Xu <han.xu@nxp.com>; Frieder Schrempf <frieder.schrempf@exceet.de>; linux-kernel@vger.kernel.org
Subject: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

This driver is derived from the SPI NOR driver at mtd/spi-nor/fsl-quadspi.c. It uses the new SPI memory interface of the SPI framework to issue flash memory operations to up to four connected flash chips (2 buses with 2 CS each).

The controller does not support generic SPI messages.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
---
 drivers/spi/Kconfig        |  11 +
 drivers/spi/Makefile       |   1 +
 drivers/spi/spi-fsl-qspi.c | 929 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 941 insertions(+)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index e62ac32..6de0df5 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -251,6 +251,17 @@ config SPI_FSL_LPSPI
 	help
 	  This enables Freescale i.MX LPSPI controllers in master mode.
 
+config SPI_FSL_QSPI
+	tristate "Freescale QSPI controller"
+	depends on ARCH_MXC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
+	depends on HAS_IOMEM
+	help
+	  This enables support for the Quad SPI controller in master mode.
+	  Up to four flash chips can be connected on two buses with two
+	  chipselects each.
+	  This controller does not support generic SPI messages. It only
+	  supports the high-level SPI memory interface.
+
 config SPI_GPIO
 	tristate "GPIO-based bitbanging SPI Master"
 	depends on GPIOLIB || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index cb1f437..a8f7fda 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_SPI_FSL_DSPI)		+= spi-fsl-dspi.o
 obj-$(CONFIG_SPI_FSL_LIB)		+= spi-fsl-lib.o
 obj-$(CONFIG_SPI_FSL_ESPI)		+= spi-fsl-espi.o
 obj-$(CONFIG_SPI_FSL_LPSPI)		+= spi-fsl-lpspi.o
+obj-$(CONFIG_SPI_FSL_QSPI)		+= spi-fsl-qspi.o
 obj-$(CONFIG_SPI_FSL_SPI)		+= spi-fsl-spi.o
 obj-$(CONFIG_SPI_GPIO)			+= spi-gpio.o
 obj-$(CONFIG_SPI_IMG_SPFI)		+= spi-img-spfi.o
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c new file mode 100644 index 0000000..c16d070
--- /dev/null
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -0,0 +1,929 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Freescale QuadSPI driver.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ * Copyright (C) 2018 Bootlin
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ *
+ * Transition to SPI MEM interface:
+ * Author:
+ *     Boris Brezillion <boris.brezillon@bootlin.com>
+ *     Frieder Schrempf <frieder.schrempf@exceet.de>
+ *
+ * Based on the original fsl-quadspi.c spi-nor driver:
+ * Author: Freescale Semiconductor, Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
+#include <linux/sizes.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+/*
+ * The driver only uses one single LUT entry, that is updated on
+ * each call of exec_op(). Index 0 is preset at boot with a basic
+ * read operation, so let's use the last entry (15).
+ */
+#define	SEQID_LUT			15
+
+/* Registers used by the driver */
+#define QUADSPI_MCR			0x00
+#define QUADSPI_MCR_RESERVED_MASK	(0xF << 16)
+#define QUADSPI_MCR_MDIS_MASK		BIT(14)
+#define QUADSPI_MCR_CLR_TXF_MASK	BIT(11)
+#define QUADSPI_MCR_CLR_RXF_MASK	BIT(10)
+#define QUADSPI_MCR_DDR_EN_MASK		BIT(7)
+#define QUADSPI_MCR_END_CFG_MASK	(0x3 << 2)
+#define QUADSPI_MCR_SWRSTHD_MASK	BIT(1)
+#define QUADSPI_MCR_SWRSTSD_MASK	BIT(0)
+
+#define QUADSPI_IPCR			0x08
+#define QUADSPI_IPCR_SEQID_SHIFT	24
+
+#define QUADSPI_BUF3CR			0x1c
+#define QUADSPI_BUF3CR_ALLMST_MASK	BIT(31)
+#define QUADSPI_BUF3CR_ADATSZ_SHIFT	8
+#define QUADSPI_BUF3CR_ADATSZ_MASK	(0xFF << QUADSPI_BUF3CR_ADATSZ_SHIFT)
+
+#define QUADSPI_BFGENCR			0x20
+#define QUADSPI_BFGENCR_SEQID_SHIFT	12
+
+#define QUADSPI_BUF0IND			0x30
+#define QUADSPI_BUF1IND			0x34
+#define QUADSPI_BUF2IND			0x38
+#define QUADSPI_SFAR			0x100
+
+#define QUADSPI_SMPR			0x108
+#define QUADSPI_SMPR_DDRSMP_MASK	(7 << 16)
+#define QUADSPI_SMPR_FSDLY_MASK		BIT(6)
+#define QUADSPI_SMPR_FSPHS_MASK		BIT(5)
+#define QUADSPI_SMPR_HSENA_MASK		BIT(0)
+
+#define QUADSPI_RBCT			0x110
+#define QUADSPI_RBCT_WMRK_MASK		0x1F
+#define QUADSPI_RBCT_RXBRD_USEIPS	BIT(8)
+
+#define QUADSPI_TBDR			0x154
+
+#define QUADSPI_SR			0x15c
+#define QUADSPI_SR_IP_ACC_MASK		BIT(1)
+#define QUADSPI_SR_AHB_ACC_MASK		BIT(2)
+
+#define QUADSPI_FR			0x160
+#define QUADSPI_FR_TFF_MASK		BIT(0)
+
+#define QUADSPI_SPTRCLR			0x16c
+#define QUADSPI_SPTRCLR_IPPTRC		BIT(8)
+#define QUADSPI_SPTRCLR_BFPTRC		BIT(0)
+
+#define QUADSPI_SFA1AD			0x180
+#define QUADSPI_SFA2AD			0x184
+#define QUADSPI_SFB1AD			0x188
+#define QUADSPI_SFB2AD			0x18c
+#define QUADSPI_RBDR(x)			(0x200 + ((x) * 4))
+
+#define QUADSPI_LUTKEY			0x300
+#define QUADSPI_LUTKEY_VALUE		0x5AF05AF0
+
+#define QUADSPI_LCKCR			0x304
+#define QUADSPI_LCKER_LOCK		BIT(0)
+#define QUADSPI_LCKER_UNLOCK		BIT(1)
+
+#define QUADSPI_RSER			0x164
+#define QUADSPI_RSER_TFIE		BIT(0)
+
+#define QUADSPI_LUT_BASE		0x310
+#define QUADSPI_LUT_OFFSET		(SEQID_LUT * 4 * 4)
+#define QUADSPI_LUT_REG(idx)		(QUADSPI_LUT_BASE + \
+					QUADSPI_LUT_OFFSET + (idx) * 4)
+
+/* Instruction set for the LUT register */
+#define LUT_STOP		0
+#define LUT_CMD			1
+#define LUT_ADDR		2
+#define LUT_DUMMY		3
+#define LUT_MODE		4
+#define LUT_MODE2		5
+#define LUT_MODE4		6
+#define LUT_FSL_READ		7
+#define LUT_FSL_WRITE		8
+#define LUT_JMP_ON_CS		9
+#define LUT_ADDR_DDR		10
+#define LUT_MODE_DDR		11
+#define LUT_MODE2_DDR		12
+#define LUT_MODE4_DDR		13
+#define LUT_FSL_READ_DDR	14
+#define LUT_FSL_WRITE_DDR	15
+#define LUT_DATA_LEARN		16
+
+/*
+ * The PAD definitions for LUT register.
+ *
+ * The pad stands for the number of IO lines [0:3].
+ * For example, the quad read needs four IO lines,
+ * so you should use LUT_PAD(4).
+ */
+#define LUT_PAD(x) (fls(x) - 1)
+
+/*
+ * Macro for constructing the LUT entries with the following
+ * register layout:
+ *
+ *  ---------------------------------------------------
+ *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
+ *  ---------------------------------------------------
+ */
+#define LUT_DEF(idx, ins, pad, opr)					\
+	((((ins) << 10) | ((pad) << 8) | (opr)) << (((idx) % 2) * 16))
+
+/* Controller needs driver to swap endianness */
+#define QUADSPI_QUIRK_SWAP_ENDIAN	BIT(0)
+
+/* Controller needs 4x internal clock */
+#define QUADSPI_QUIRK_4X_INT_CLK	BIT(1)
+
+/*
+ * TKT253890, the controller needs the driver to fill the txfifo with
+ * 16 bytes at least to trigger a data transfer, even though the extra
+ * data won't be transferred.
+ */
+#define QUADSPI_QUIRK_TKT253890		BIT(2)
+
+/* TKT245618, the controller cannot wake up from wait mode */
+#define QUADSPI_QUIRK_TKT245618		BIT(3)
+
+enum fsl_qspi_devtype {
+	FSL_QUADSPI_VYBRID,
+	FSL_QUADSPI_IMX6SX,
+	FSL_QUADSPI_IMX7D,
+	FSL_QUADSPI_IMX6UL,
+	FSL_QUADSPI_LS1021A,
+	FSL_QUADSPI_LS2080A,
+};
+
+struct fsl_qspi_devtype_data {
+	enum fsl_qspi_devtype devtype;
+	unsigned int rxfifo;
+	unsigned int txfifo;
+	unsigned int ahb_buf_size;
+	unsigned int quirks;
+};
+
+static const struct fsl_qspi_devtype_data vybrid_data = {
+	.devtype = FSL_QUADSPI_VYBRID,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_SWAP_ENDIAN,
+};
+
+static const struct fsl_qspi_devtype_data imx6sx_data = {
+	.devtype = FSL_QUADSPI_IMX6SX,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_4X_INT_CLK | QUADSPI_QUIRK_TKT245618, };
+
+static const struct fsl_qspi_devtype_data imx7d_data = {
+	.devtype = FSL_QUADSPI_IMX7D,
+	.rxfifo = SZ_512,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK, };
+
+static const struct fsl_qspi_devtype_data imx6ul_data = {
+	.devtype = FSL_QUADSPI_IMX6UL,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK, };

Closing brace should be on next line for all above entries.

+
+static const struct fsl_qspi_devtype_data ls1021a_data = {
+	.devtype = FSL_QUADSPI_LS1021A,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = 0,
+};
+
+static const struct fsl_qspi_devtype_data ls2080a_data = {
+	.devtype = FSL_QUADSPI_LS2080A,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890,
+};
+
+struct fsl_qspi {
+	void __iomem *iobase;
+	void __iomem *ahb_addr;
+	u32 memmap_phy;
+	struct clk *clk, *clk_en;
+	struct device *dev;
+	struct completion c;
+	const struct fsl_qspi_devtype_data *devtype_data;
+	bool big_endian;
+	struct mutex lock;
+	struct pm_qos_request pm_qos_req;
+	int selected;
+};
+
+static inline int needs_swap_endian(struct fsl_qspi *q) {
+	return q->devtype_data->quirks & QUADSPI_QUIRK_SWAP_ENDIAN; }
+
+static inline int needs_4x_clock(struct fsl_qspi *q) {
+	return q->devtype_data->quirks & QUADSPI_QUIRK_4X_INT_CLK; }
+
+static inline int needs_fill_txfifo(struct fsl_qspi *q) {
+	return q->devtype_data->quirks & QUADSPI_QUIRK_TKT253890; }
+
+static inline int needs_wakeup_wait_mode(struct fsl_qspi *q) {
+	return q->devtype_data->quirks & QUADSPI_QUIRK_TKT245618; }
+
+/*
+ * An IC bug makes it necessary to rearrange the 32-bit data.
+ * Later chips, such as IMX6SLX, have fixed this bug.
+ */
+static inline u32 fsl_qspi_endian_xchg(struct fsl_qspi *q, u32 a) {
+	return needs_swap_endian(q) ? __swab32(a) : a; }
+
+/*
+ * R/W functions for big- or little-endian registers:
+ * The QSPI controller's endianness is independent of
+ * the CPU core's endianness. So far, although the CPU
+ * core is little-endian the QSPI controller can use
+ * big-endian or little-endian.
+ */
+static void qspi_writel(struct fsl_qspi *q, u32 val, void __iomem 
+*addr) {
+	if (q->big_endian)
+		iowrite32be(val, addr);
+	else
+		iowrite32(val, addr);
+}
+
+static u32 qspi_readl(struct fsl_qspi *q, void __iomem *addr) {
+	if (q->big_endian)
+		return ioread32be(addr);
+	else
+		return ioread32(addr);
+}
+
+static irqreturn_t fsl_qspi_irq_handler(int irq, void *dev_id) {
+	struct fsl_qspi *q = dev_id;
+	u32 reg;
+
+	/* clear interrupt */
+	reg = qspi_readl(q, q->iobase + QUADSPI_FR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_FR);
+
+	if (reg & QUADSPI_FR_TFF_MASK)
+		complete(&q->c);
+
+	dev_dbg(q->dev, "QUADSPI_FR : 0x%.8x:0x%.8x\n", 0, reg);
+	return IRQ_HANDLED;
+}
+
+static int fsl_qspi_check_buswidth(struct fsl_qspi *q, u8 width) {
+	switch (width) {
+	case 1:
+	case 2:
+	case 4:
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static bool fsl_qspi_supports_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	ret = fsl_qspi_check_buswidth(q, op->cmd.buswidth);
+
+	if (op->addr.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->addr.buswidth);
+
+	if (op->dummy.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->dummy.buswidth);
+
+	if (op->data.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->data.buswidth);
+
+	if (ret)
+		return false;
+
+	/*
+	 * The number of instructions needed for the op, needs
+	 * to fit into a single LUT entry.
+	 */
+	if (op->addr.nbytes +
+	   (op->dummy.nbytes ? 1:0) +
+	   (op->data.nbytes ? 1:0) > 6)
+		return false;
+
+	/* Max 64 dummy clock cycles supported */
+	if (op->dummy.nbytes * 8 / op->dummy.buswidth > 64)
+		return false;
+
+	/* Max data length, check controller limits and alignment */
+	if (op->data.dir == SPI_MEM_DATA_IN &&
+	    (op->data.nbytes > q->devtype_data->ahb_buf_size ||
+	     (op->data.nbytes > q->devtype_data->rxfifo - 4 &&
+	      !IS_ALIGNED(op->data.nbytes, 8))))
+		return false;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT &&
+	    op->data.nbytes > q->devtype_data->txfifo)
+		return false;
+
+	return true;
+}
+
+static void fsl_qspi_prepare_lut(struct fsl_qspi *q,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	u32 lutval[4] = {};
+	int lutidx = 1, i;
+
+	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
+			     op->cmd.opcode);
+
+	/*
+	 * For some unknown reason, using LUT_ADDR doesn't work in some
+	 * cases (at least with only one byte long addresses), so
+	 * let's use LUT_MODE to write the address bytes one by one
+	 */
+	for (i = 0; i < op->addr.nbytes; i++) {
+		u8 addrbyte = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_MODE,
+					      LUT_PAD(op->addr.buswidth),
+					      addrbyte);
+		lutidx++;
+	}
+

For ADDR filling in LUT we should use LUT_ADDR only, needs to find out the reason for the issue and we shouldn't use LUT_MODE here.
I have few more comments regarding same, mentioned below.

+	if (op->dummy.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
+					      LUT_PAD(op->dummy.buswidth),
+					      op->dummy.nbytes * 8 /
+					      op->dummy.buswidth);
+		lutidx++;
+	}
+
+	if (op->data.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx,
+					      op->data.dir == SPI_MEM_DATA_IN ?
+					      LUT_FSL_READ : LUT_FSL_WRITE,
+					      LUT_PAD(op->data.buswidth),
+					      0);
+		lutidx++;
+	}
+
+	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
+
+	/* unlock LUT */
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
+
+	/* fill LUT */
+	for (i = 0; i < ARRAY_SIZE(lutval); i++)
+		qspi_writel(q, lutval[i], base + QUADSPI_LUT_REG(i));
+
+	/* lock LUT */
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR); }
+
+static int fsl_qspi_clk_prep_enable(struct fsl_qspi *q) {
+	int ret;
+
+	ret = clk_prepare_enable(q->clk_en);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(q->clk);
+	if (ret) {
+		clk_disable_unprepare(q->clk_en);
+		return ret;
+	}
+
+	if (needs_wakeup_wait_mode(q))
+		pm_qos_add_request(&q->pm_qos_req, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	return 0;
+}
+
+static void fsl_qspi_clk_disable_unprep(struct fsl_qspi *q) {
+	if (needs_wakeup_wait_mode(q))
+		pm_qos_remove_request(&q->pm_qos_req);
+
+	clk_disable_unprepare(q->clk);
+	clk_disable_unprepare(q->clk_en);
+}
+
+static void fsl_qspi_select_mem(struct fsl_qspi *q, struct spi_device 
+*spi) {
+	unsigned long rate = spi->max_speed_hz;
+	int ret, i;
+	u32 map_addr;
+
+	if (q->selected == spi->chip_select)
+		return;
+
+	/*
+	 * In HW there can be a maximum of four chips on two buses with
+	 * two chip selects on each bus. We use four chip selects in SW
+	 * to differentiate between the four chips.
+	 * We use the SFA1AD, SFA2AD, SFB1AD, SFB2AD registers to select
+	 * the chip we want to access.
+	 */
+	for (i = 0; i < 4; i++) {
+		if (i < spi->chip_select)
+			map_addr = q->memmap_phy;
+		else
+			map_addr = q->memmap_phy +
+				   2 * q->devtype_data->ahb_buf_size;
+
+		qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
+	}
+
+	if (needs_4x_clock(q))
+		rate *= 4;
+
+	fsl_qspi_clk_disable_unprep(q);
+
+	ret = clk_set_rate(q->clk, rate);
+	if (ret)
+		return;
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret)
+		return;
+
+	q->selected = spi->chip_select;
+}
+
+static void fsl_qspi_read_ahb(struct fsl_qspi *q, const struct 
+spi_mem_op *op) {
+	static int seq;
+
+	/*
+	 * We want to avoid needing to invalidate the cache by issueing
+	 * a reset to the AHB and Serial Flash domain, as this needs
+	 * time. So we change the address on each read to trigger an
+	 * actual read operation on the flash. The actual address for
+	 * the flash memory is set by programming the LUT.
+	 */
+	memcpy_fromio(op->data.buf.in,
+		      q->ahb_addr +
+		      (seq * q->devtype_data->ahb_buf_size),
+		      op->data.nbytes);
+
+	seq = seq ? 0 : 1;
+}
+
+static void fsl_qspi_fill_txfifo(struct fsl_qspi *q,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int i;
+
+	for (i = 0; i < op->data.nbytes; i += 4) {
+		u32 val = 0;
+
+		memcpy(&val, op->data.buf.out + i,
+		       min_t(unsigned int, op->data.nbytes - i, 4));
+
+		val = fsl_qspi_endian_xchg(q, val);
+		qspi_writel(q, val, base + QUADSPI_TBDR);
+	}
+
+	if (needs_fill_txfifo(q)) {
+		for (; i < 16; i += 4)
+			qspi_writel(q, 0, base + QUADSPI_TBDR);
+	}
+}
+
+static void fsl_qspi_read_rxfifo(struct fsl_qspi *q,
+			  const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int i;
+	u8 *buf = op->data.buf.in;
+
+	for (i = 0; i < op->data.nbytes; i += 4) {
+		u32 val = qspi_readl(q, base + QUADSPI_RBDR(i / 4));
+
+		val = fsl_qspi_endian_xchg(q, val);
+
+		memcpy(buf + i, &val,
+		       min_t(unsigned int, op->data.nbytes - i, 4));
+	}
+}
+
+static int fsl_qspi_do_op(struct fsl_qspi *q, const struct spi_mem_op 
+*op) {
+	void __iomem *base = q->iobase;
+	int err = 0;
+
+	init_completion(&q->c);
+
+	/*
+	 * Always start the sequence at the same index since we update
+	 * the LUT at each exec_op() call. And also specify the DATA
+	 * length, since it's has not been specified in the LUT.
+	 */
+	qspi_writel(q, op->data.nbytes |
+		    (SEQID_LUT << QUADSPI_IPCR_SEQID_SHIFT),
+		    base + QUADSPI_IPCR);
+
+	/* Wait for the interrupt. */
+	if (!wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+
+	if (!err && op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN)
+		fsl_qspi_read_rxfifo(q, op);
+
+	return err;
+}
+
+static int fsl_qspi_exec_op(struct spi_mem *mem, const struct 
+spi_mem_op *op) {
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	void __iomem *base = q->iobase;
+	int err = 0;
+
+	mutex_lock(&q->lock);
+
+	/* wait for the controller being ready */
+	do {
+		u32 status;
+
+		status = qspi_readl(q, base + QUADSPI_SR);
+		if (status &
+		    (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
+			udelay(1);
+			dev_dbg(q->dev, "The controller is busy, 0x%x\n",
+				status);
+			continue;
+		}
+		break;
+	} while (1);
+
+	fsl_qspi_select_mem(q, mem->spi);
+
+	qspi_writel(q, q->memmap_phy, base + QUADSPI_SFAR);

SFAR should have the actual address where we are doing operation.

For e.g. If reading from flash-0 offset 0x100000 than SFAR should have address as 0x20100000.
As for 'read/write' request 'from/to' respectively been saved in struct spi_mem_op [op.val] this should be added to q->memmap_phy.

In LUT preparation for ADDR, we should use ADDR_WIDTH as 3-byte or 4-byte addressing only.
Start address should be saved in SFAR register.

+
+	qspi_writel(q,
+		    qspi_readl(q, base + QUADSPI_MCR) |
+		    QUADSPI_MCR_CLR_RXF_MASK | QUADSPI_MCR_CLR_TXF_MASK,
+		    base + QUADSPI_MCR);
+
+	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC | QUADSPI_SPTRCLR_IPPTRC,
+		    base + QUADSPI_SPTRCLR);
+
+	fsl_qspi_prepare_lut(q, op);
+
+	/*
+	 * If we have large chunks of data, we read them through the AHB bus
+	 * by accessing the mapped memory. In all other cases we use
+	 * IP commands to access the flash.
+	 */
+	if (op->data.nbytes > (q->devtype_data->rxfifo - 4) &&
+	    op->data.dir == SPI_MEM_DATA_IN) {
+		fsl_qspi_read_ahb(q, op);
+	} else {
+		qspi_writel(q,
+			    QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
+			    base + QUADSPI_RBCT);
+
+		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+			fsl_qspi_fill_txfifo(q, op);
+
+		err = fsl_qspi_do_op(q, op);
+	}
+
+	mutex_unlock(&q->lock);
+
+	return err;
+}
+
+static int fsl_qspi_adjust_op_size(struct spi_mem *mem, struct 
+spi_mem_op *op) {
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		if (op->data.nbytes > q->devtype_data->txfifo)
+			op->data.nbytes = q->devtype_data->txfifo;
+	} else {
+		if (op->data.nbytes > q->devtype_data->ahb_buf_size)
+			op->data.nbytes = q->devtype_data->ahb_buf_size;
+		else if (op->data.nbytes > (q->devtype_data->rxfifo - 4))
+			op->data.nbytes = ALIGN_DOWN(op->data.nbytes, 8);
+	}
+
+	return 0;
+}
+
+static int fsl_qspi_default_setup(struct fsl_qspi *q) {
+	void __iomem *base = q->iobase;
+	u32 reg;
+	int ret;
+
+	/* disable and unprepare clock to avoid glitch pass to controller */
+	fsl_qspi_clk_disable_unprep(q);
+
+	/* the default frequency, we will change it later if necessary. */
+	ret = clk_set_rate(q->clk, 66000000);
+	if (ret)
+		return ret;
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret)
+		return ret;
+
+	/* Reset the module */
+	qspi_writel(q, QUADSPI_MCR_SWRSTSD_MASK | QUADSPI_MCR_SWRSTHD_MASK,
+		base + QUADSPI_MCR);
+	udelay(1);
+
+	/* Disable the module */
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
+			base + QUADSPI_MCR);
+
+	reg = qspi_readl(q, base + QUADSPI_SMPR);
+	qspi_writel(q, reg & ~(QUADSPI_SMPR_FSDLY_MASK
+			| QUADSPI_SMPR_FSPHS_MASK
+			| QUADSPI_SMPR_HSENA_MASK
+			| QUADSPI_SMPR_DDRSMP_MASK), base + QUADSPI_SMPR);
+
+	/* We only use the buffer3 for AHB read */
+	qspi_writel(q, 0, base + QUADSPI_BUF0IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
+
+	qspi_writel(q, SEQID_LUT << QUADSPI_BFGENCR_SEQID_SHIFT,
+		    q->iobase + QUADSPI_BFGENCR);
+	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, base + QUADSPI_RBCT);
+	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
+		    ((q->devtype_data->ahb_buf_size / 8) <<
+		    QUADSPI_BUF3CR_ADATSZ_SHIFT),
+		    base + QUADSPI_BUF3CR);
+
+	q->selected = -1;
+
+	/* Enable the module */
+	qspi_writel(q, QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
+			base + QUADSPI_MCR);
+
+	/* clear all interrupt status */
+	qspi_writel(q, 0xffffffff, q->iobase + QUADSPI_FR);
+
+	/* enable the interrupt */
+	qspi_writel(q, QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
+
+	return 0;
+}
+
+static const char *fsl_qspi_get_name(struct spi_mem *mem) {
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	struct device *dev = &mem->spi->dev;
+	const char *name;
+
+	/*
+	 * In order to keep mtdparts compatible with the old MTD driver at
+	 * mtd/spi-nor/fsl-quadspi.c, we set a custom name derived from the
+	 * platform_device of the controller.
+	 */
+	if (of_get_available_child_count(q->dev->of_node) == 1)
+		name = dev_name(q->dev);
+	else
+		name = devm_kasprintf(dev, GFP_KERNEL,
+				      "%s-%d", dev_name(q->dev),
+				      mem->spi->chip_select);
+
+	if (!name) {
+		dev_err(dev, "failed to get memory for custom flash name\n");
+		return dev_name(q->dev);
+	}
+
+	return name;
+}
+
+static const struct spi_controller_mem_ops fsl_qspi_mem_ops = {
+	.adjust_op_size = fsl_qspi_adjust_op_size,
+	.supports_op = fsl_qspi_supports_op,
+	.exec_op = fsl_qspi_exec_op,
+	.get_name = fsl_qspi_get_name,
+};
+
+static int fsl_qspi_probe(struct platform_device *pdev) {
+	struct spi_controller *ctlr;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	struct fsl_qspi *q;
+	int ret;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*q));
+	if (!ctlr)
+		return -ENOMEM;
+
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD |
+			  SPI_TX_DUAL | SPI_TX_QUAD;
+
+	q = spi_controller_get_devdata(ctlr);
+	q->dev = dev;
+	q->devtype_data = of_device_get_match_data(dev);
+	if (!q->devtype_data) {
+		ret = -ENODEV;
+		goto err_put_ctrl;
+	}
+
+	platform_set_drvdata(pdev, q);
+
+	/* find the resources */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "QuadSPI");
+	q->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->iobase)) {
+		ret = PTR_ERR(q->iobase);
+		goto err_put_ctrl;
+	}
+
+	q->big_endian = of_property_read_bool(np, "big-endian");
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					"QuadSPI-memory");
+	q->ahb_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->ahb_addr)) {
+		ret = PTR_ERR(q->ahb_addr);
+		goto err_put_ctrl;
+	}
+
+	q->memmap_phy = res->start;
+
+	/* find the clocks */
+	q->clk_en = devm_clk_get(dev, "qspi_en");
+	if (IS_ERR(q->clk_en)) {
+		ret = PTR_ERR(q->clk_en);
+		goto err_put_ctrl;
+	}
+
+	q->clk = devm_clk_get(dev, "qspi");
+	if (IS_ERR(q->clk)) {
+		ret = PTR_ERR(q->clk);
+		goto err_put_ctrl;
+	}
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		goto err_put_ctrl;
+	}
+
+	/* find the irq */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "failed to get the irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	ret = devm_request_irq(dev, ret,
+			fsl_qspi_irq_handler, 0, pdev->name, q);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	mutex_init(&q->lock);
+
+	ctlr->bus_num = -1;
+	ctlr->num_chipselect = 4;
+	ctlr->mem_ops = &fsl_qspi_mem_ops;
+
+	fsl_qspi_default_setup(q);
+
+	ctlr->dev.of_node = np;
+
+	ret = spi_register_controller(ctlr);
+	if (ret)
+		goto err_destroy_mutex;
+
+	return 0;
+
+err_destroy_mutex:
+	mutex_destroy(&q->lock);
+
+err_disable_clk:
+	fsl_qspi_clk_disable_unprep(q);
+
+err_put_ctrl:
+	spi_controller_put(ctlr);
+
+	dev_err(dev, "Freescale QuadSPI probe failed\n");
+	return ret;
+}
+
+static int fsl_qspi_remove(struct platform_device *pdev) {
+	struct fsl_qspi *q = platform_get_drvdata(pdev);
+
+	/* disable the hardware */
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER);
+
+	fsl_qspi_clk_disable_unprep(q);
+
+	mutex_destroy(&q->lock);
+
+	if (q->ahb_addr)
+		iounmap(q->ahb_addr);
+
+	return 0;
+}
+
+static int fsl_qspi_suspend(struct platform_device *pdev, pm_message_t 
+state) {
+	return 0;
+}
+
+static int fsl_qspi_resume(struct platform_device *pdev) {
+	struct fsl_qspi *q = platform_get_drvdata(pdev);
+
+	fsl_qspi_default_setup(q);
+
+	return 0;
+}
+
+static const struct of_device_id fsl_qspi_dt_ids[] = {
+	{ .compatible = "fsl,vf610-qspi", .data = &vybrid_data, },
+	{ .compatible = "fsl,imx6sx-qspi", .data = &imx6sx_data, },
+	{ .compatible = "fsl,imx7d-qspi", .data = &imx7d_data, },
+	{ .compatible = "fsl,imx6ul-qspi", .data = &imx6ul_data, },
+	{ .compatible = "fsl,ls1021a-qspi", .data = &ls1021a_data, },
+	{ .compatible = "fsl,ls2080a-qspi", .data = &ls2080a_data, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_qspi_dt_ids);
+
+static struct platform_driver fsl_qspi_driver = {
+	.driver = {
+		.name	= "fsl-quadspi",
+		.of_match_table = fsl_qspi_dt_ids,
+	},
+	.probe          = fsl_qspi_probe,
+	.remove		= fsl_qspi_remove,
+	.suspend	= fsl_qspi_suspend,
+	.resume		= fsl_qspi_resume,
+};
+module_platform_driver(fsl_qspi_driver);
+
+MODULE_DESCRIPTION("Freescale QuadSPI Controller Driver"); 
+MODULE_AUTHOR("Freescale Semiconductor Inc."); MODULE_AUTHOR("Boris 
+Brezillion <boris.brezillon@bootlin.com>"); MODULE_AUTHOR("Frieder 
+Schrempf <frieder.schrempf@exceet.de>"); MODULE_LICENSE("GPL v2");
--
2.7.4

Also we should add more debug print messages under dev_dbg() like in func like fsl_qspi_prepare_lut() etc.
Boris Brezillon June 8, 2018, 12:51 p.m. | #8
Hi Yogesh,

On Fri, 8 Jun 2018 11:54:12 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Hi Frieder,
> 
> I have tried to validate your patch on fsl,ls2080a target having 2 Spansion NOR flash, S25FS512S, as slave device.
> Below are my observations:
> 
> Observation 1:
> In Linux boot logs after driver probing is successful, getting below log messages
> [    1.435986] m25p80 spi0.0: found s25fl512s, expected m25p80
> [    1.441564] m25p80 spi0.0: s25fl512s (65536 Kbytes)
> [    1.446972] m25p80 spi0.1: found s25fl512s, expected m25p80
> [    1.452548] m25p80 spi0.1: s25fl512s (65536 Kbytes)
> 
> IMHO, we need to correct message as 'found s25fl512s, expected m25p80' as final underlying connected flash device is s25fl512s.

Not sure what you mean here. What would you like us to fix exactly?

> 
> Observation 2:
> I have observed data sanity issue after performing read/write operations using MTD interface. Explained below
> 
> root:~# mtd_debug erase /dev/mtd0 0x1000000 0x40000
> Erased 262144 bytes from address 0x01000000 in flash                      --> Erase at address 0x1000000 of erase size 0x40000
> root:~# mtd_debug read /dev/mtd0 0x0 0x100 rp
> Copied 256 bytes from address 0x00000000 in flash to rp                   --> Read 0x100 bytes from flash from address 0x0 in file rp
> root:~# mtd_debug write /dev/mtd0 0x1000000 0x100 rp
> Copied 256 bytes from rp to address 0x01000000 in flash                   --> Write 0x100 bytes to flash address 0x1000000 from file rp
> root:~# mtd_debug read /dev/mtd0 0x1000000 0x100 wp
> Copied 256 bytes from address 0x01000000 in flash to wp                  --> Read 0x100 bytes from flash from address 0x1000000 in file wp
> root:~# diff rp wp                                                                                           --> compare both rp and wp files, if they are different output comes on console stating file are different
> Files rp and wp differ
> root:~# hexdump wp
> 0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
> 0000010 0000 0000 0000 0000 0000 0000 0000 000a
> 0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
> 0000030 0000 0000 0040 0000 005b 0000 0000 0000
> 0000040 ffff ffff ffff ffff ffff ffff ffff ffff
> *
> 0000100
> root:~# hexdump rp
> 0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
> 0000010 0000 0000 0000 0000 0000 0000 0000 000a
> 0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
> 0000030 0000 0000 0040 0000 005b 0000 0000 0000
> 0000040 2403 0000 0000 0000 0000 0000 0000 0000
> 0000050 0000 0000 0000 0000 0000 0000 0000 0000
> *
> 0000070 0011 0000 09e7 0000 0000 4411 9555 0050
> 0000080 0000 0000 0000 0000 f9bc afa1 0404 31e0
> 0000090 0000 0000 0400 31e0 0000 2010 08dc 31eb
> 00000a0 2880 0050 1300 31eb 4e20 8010 0000 80ff
> 00000b0 0000 0000 beef dead beef dead beef dead
> 00000c0 beef dead beef dead beef dead beef dead
> *
> 0000100
> root:~#
> 
> In hexdump output of the file which being read from address 0x1000000,wp, it can be observed that only first 64 bytes (0x40) are written on the flash.
> 
> Observation 3:
> As we can support JFFS2 filesystem on NOR flash, so we can expect JFFS2 commands should work fine on NOR flash.
> But with this driver change my mount command is not working.
> 
> In my target there are 2 flash slave devices connected, and I have given argument to create MTD partition like "mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs) " for 2nd flash.
> Below is output for /proc/mtd commands
>     root@ls1012ardb:~# cat /proc/mtd
>     dev:    size   erasesize  name
>     mtd0: 04000000 00040000 "20c0000.quadspi-0"   --> First 64MB flash
>     mtd1: 00500000 00040000 "rcw"                               --> Second 64 MB flash device, 3 MTD partition are created for it.
>     mtd2: 00a00000 00040000 "test"
>     mtd3: 02e00000 00040000 "rootfs"
> 
>     root@ls1012ardb:~# mkdir /media/ram ; flash_eraseall /dev/mtd3
>     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
>     Erasing 256 Kibyte @ 0 --  0 % complete [   18.299929] random: crng init done
>     Erasing 256 Kibyte @ 2dc0000 -- 100 % complete
>     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
> 
> This command didn't finish successfully and there are lot of messages coming on console mentioning failure in jffs2_scan_eraseblock()
>     [  187.118677] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0000: 0x2886 instead
>     [  187.128159] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0004: 0x7a3b instead
>     [  187.137641] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0008: 0xb10f instead
> 
> If I remove this patch series and check with older implementation, JFFS2 mounting is working fine.

Problems 2 and 3 should definitely be fixed. That's weird because I
remember that Frieder tested the new driver with a NOR chip, maybe not
with JFFS2 though.

> 
> Observation 4:
> With previous driver, we can read content of flash directly using devmem command
> Like devmem 0x20000000  "Flash is connected at this Quad-SPI address"
> 
> But with new driver devmem interface reporting in-correct value.

This one is clearly not something we should fix. What you were doing is
unsafe (accessing the direct mapping from userspace without making sure
you're the only one to access the device), and making it even more
broken is IMO a better thing. You want to access the memory from
user-space, just use the standard MTD interface (/dev/mtdX).

[...]

> +
> +static void fsl_qspi_prepare_lut(struct fsl_qspi *q,
> +				 const struct spi_mem_op *op)
> +{
> +	void __iomem *base = q->iobase;
> +	u32 lutval[4] = {};
> +	int lutidx = 1, i;
> +
> +	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
> +			     op->cmd.opcode);
> +
> +	/*
> +	 * For some unknown reason, using LUT_ADDR doesn't work in some
> +	 * cases (at least with only one byte long addresses), so
> +	 * let's use LUT_MODE to write the address bytes one by one
> +	 */
> +	for (i = 0; i < op->addr.nbytes; i++) {
> +		u8 addrbyte = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
> +
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_MODE,
> +					      LUT_PAD(op->addr.buswidth),
> +					      addrbyte);
> +		lutidx++;
> +	}
> +
> 
> For ADDR filling in LUT we should use LUT_ADDR only, needs to find out the reason for the issue and we shouldn't use LUT_MODE here.

Just try with a 16-bit address and you'll see it does not work. I don't
know why, and it's more something you should ask to someone working at
NXP ;-).

> I have few more comments regarding same, mentioned below.
> 
> +	if (op->dummy.nbytes) {
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
> +					      LUT_PAD(op->dummy.buswidth),
> +					      op->dummy.nbytes * 8 /
> +					      op->dummy.buswidth);
> +		lutidx++;
> +	}
> +
> +	if (op->data.nbytes) {
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx,
> +					      op->data.dir == SPI_MEM_DATA_IN ?
> +					      LUT_FSL_READ : LUT_FSL_WRITE,
> +					      LUT_PAD(op->data.buswidth),
> +					      0);
> +		lutidx++;
> +	}
> +
> +	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
> +
> +	/* unlock LUT */
> +	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
> +	qspi_writel(q, QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
> +
> +	/* fill LUT */
> +	for (i = 0; i < ARRAY_SIZE(lutval); i++)
> +		qspi_writel(q, lutval[i], base + QUADSPI_LUT_REG(i));
> +
> +	/* lock LUT */
> +	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
> +	qspi_writel(q, QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR); }
> +

[...]

> +static int fsl_qspi_exec_op(struct spi_mem *mem, const struct 
> +spi_mem_op *op) {
> +	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
> +	void __iomem *base = q->iobase;
> +	int err = 0;
> +
> +	mutex_lock(&q->lock);
> +
> +	/* wait for the controller being ready */
> +	do {
> +		u32 status;
> +
> +		status = qspi_readl(q, base + QUADSPI_SR);
> +		if (status &
> +		    (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
> +			udelay(1);
> +			dev_dbg(q->dev, "The controller is busy, 0x%x\n",
> +				status);
> +			continue;
> +		}
> +		break;
> +	} while (1);
> +
> +	fsl_qspi_select_mem(q, mem->spi);
> +
> +	qspi_writel(q, q->memmap_phy, base + QUADSPI_SFAR);
> 
> SFAR should have the actual address where we are doing operation.

Not with the new approach. SFAR is now automatically reconfigured at
each access, and it works because we're not using a LUT_ADDR
instruction but a LUT_MODE one. Sure, I'd prefer to go for the clean
solution with a LUT_ADDR and the address passed through SFAR (+AHB
offset), but it does not work with anything that is not 24 bits or
32 bits wide, which means it does not work when you need to access a SPI
NAND device (on which some addresses are 16 bits wide).

> 
> For e.g. If reading from flash-0 offset 0x100000 than SFAR should have address as 0x20100000.
> As for 'read/write' request 'from/to' respectively been saved in struct spi_mem_op [op.val] this should be added to q->memmap_phy.

You're still thinking as if the driver was only controlling a NOR
device which can be directly addressed. This is not the case for NAND
devices where you first have to load the data in the NAND internal cache
and then read data from the cache.

> 
> In LUT preparation for ADDR, we should use ADDR_WIDTH as 3-byte or 4-byte addressing only.

Please have a look at SPI NAND datasheets and you'll see it's simply
not possible. So, either NXP doesn't want his QSPI controller to
interface with anything except NORs or we have to use the trick we have
here (LUT_MODE instead of LUT_ADDR).

> Start address should be saved in SFAR register.
> 
> +
> +	qspi_writel(q,
> +		    qspi_readl(q, base + QUADSPI_MCR) |
> +		    QUADSPI_MCR_CLR_RXF_MASK | QUADSPI_MCR_CLR_TXF_MASK,
> +		    base + QUADSPI_MCR);
> +
> +	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC | QUADSPI_SPTRCLR_IPPTRC,
> +		    base + QUADSPI_SPTRCLR);
> +
> +	fsl_qspi_prepare_lut(q, op);
> +
> +	/*
> +	 * If we have large chunks of data, we read them through the AHB bus
> +	 * by accessing the mapped memory. In all other cases we use
> +	 * IP commands to access the flash.
> +	 */
> +	if (op->data.nbytes > (q->devtype_data->rxfifo - 4) &&
> +	    op->data.dir == SPI_MEM_DATA_IN) {
> +		fsl_qspi_read_ahb(q, op);
> +	} else {
> +		qspi_writel(q,
> +			    QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
> +			    base + QUADSPI_RBCT);
> +
> +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
> +			fsl_qspi_fill_txfifo(q, op);
> +
> +		err = fsl_qspi_do_op(q, op);
> +	}
> +
> +	mutex_unlock(&q->lock);
> +
> +	return err;
> +}

[...]

> 
> Also we should add more debug print messages under dev_dbg() like in func like fsl_qspi_prepare_lut() etc.
> 

Would you mind giving more details about where you'd like this traces
to be placed exactly and what information you'd like to display?

Thanks,

Boris
Andy Shevchenko June 8, 2018, 8:27 p.m. | #9
On Fri, Jun 8, 2018 at 2:54 PM, Yogesh Narayan Gaur
<yogeshnarayan.gaur@nxp.com> wrote:

Hi Frieder,

> +#define QUADSPI_MCR_RESERVED_MASK      (0xF << 16)

GENMASK()

> +#define QUADSPI_MCR_END_CFG_MASK       (0x3 << 2)

> +#define QUADSPI_BUF3CR_ADATSZ_MASK     (0xFF << QUADSPI_BUF3CR_ADATSZ_SHIFT)

> +#define QUADSPI_SMPR_DDRSMP_MASK       (7 << 16)

> +#define QUADSPI_RBCT_WMRK_MASK         0x1F

Ditto.

> +#define QUADSPI_LUT_OFFSET             (SEQID_LUT * 4 * 4)
> +#define QUADSPI_LUT_REG(idx)           (QUADSPI_LUT_BASE + \
> +                                       QUADSPI_LUT_OFFSET + (idx) * 4)

It looks slightly better when

#define FOO \
 (BAR1 + BAR2 ...)

> +/*
> + * An IC bug makes it necessary to rearrange the 32-bit data.
> + * Later chips, such as IMX6SLX, have fixed this bug.
> + */
> +static inline u32 fsl_qspi_endian_xchg(struct fsl_qspi *q, u32 a) {
> +       return needs_swap_endian(q) ? __swab32(a) : a; }

func()
{
...
}

Fix this everywhere.



> +static void qspi_writel(struct fsl_qspi *q, u32 val, void __iomem
> +*addr) {
> +       if (q->big_endian)
> +               iowrite32be(val, addr);
> +       else
> +               iowrite32(val, addr);
> +}
> +
> +static u32 qspi_readl(struct fsl_qspi *q, void __iomem *addr) {
> +       if (q->big_endian)
> +               return ioread32be(addr);
> +       else
> +               return ioread32(addr);
> +}

Better to define ->read() and ->write() callbacks and call them unconditionally.

> +static int fsl_qspi_check_buswidth(struct fsl_qspi *q, u8 width) {

> +       switch (width) {
> +       case 1:
> +       case 2:
> +       case 4:
> +               return 0;
> +       }


if (!is_power_of_2(width) || width >= 8)
 return -E...;

return 0;

?

> +
> +       return -ENOTSUPP;
> +}

> +static int fsl_qspi_clk_prep_enable(struct fsl_qspi *q) {
> +       int ret;
> +
> +       ret = clk_prepare_enable(q->clk_en);
> +       if (ret)
> +               return ret;
> +
> +       ret = clk_prepare_enable(q->clk);
> +       if (ret) {

> +               clk_disable_unprepare(q->clk_en);

Is it needed here?

> +               return ret;
> +       }
> +
> +       if (needs_wakeup_wait_mode(q))
> +               pm_qos_add_request(&q->pm_qos_req, PM_QOS_CPU_DMA_LATENCY, 0);
> +
> +       return 0;
> +}

> +               qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));

Redundant parens.



> +       seq = seq ? 0 : 1;

seq = (seq + 1) % 2;

?

> +}

> +       for (i = 0; i < op->data.nbytes; i += 4) {
> +               u32 val = 0;
> +
> +               memcpy(&val, op->data.buf.out + i,

> +                      min_t(unsigned int, op->data.nbytes - i, 4));

You may easily avoid this conditional on each iteration.

> +
> +               val = fsl_qspi_endian_xchg(q, val);
> +               qspi_writel(q, val, base + QUADSPI_TBDR);
> +       }

> +       /* wait for the controller being ready */

FOREVER! See below.

> +       do {
> +               u32 status;
> +
> +               status = qspi_readl(q, base + QUADSPI_SR);
> +               if (status &
> +                   (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
> +                       udelay(1);
> +                       dev_dbg(q->dev, "The controller is busy, 0x%x\n",
> +                               status);
> +                       continue;
> +               }
> +               break;
> +       } while (1);

Please, avoid infinite loops.

unsigned int count = 100;
...
do {
...
} while (--count);

> +       if (of_get_available_child_count(q->dev->of_node) == 1)
> +               name = dev_name(q->dev);
> +       else
> +               name = devm_kasprintf(dev, GFP_KERNEL,
> +                                     "%s-%d", dev_name(q->dev),
> +                                     mem->spi->chip_select);
> +
> +       if (!name) {
> +               dev_err(dev, "failed to get memory for custom flash name\n");

> +               return dev_name(q->dev);

Might it be racy if in between device gets a name assigned?

> +       }

> +       if (q->ahb_addr)
> +               iounmap(q->ahb_addr);

Double unmap?

> +static struct platform_driver fsl_qspi_driver = {
> +       .driver = {
> +               .name   = "fsl-quadspi",
> +               .of_match_table = fsl_qspi_dt_ids,
> +       },
> +       .probe          = fsl_qspi_probe,
> +       .remove         = fsl_qspi_remove,

> +       .suspend        = fsl_qspi_suspend,
> +       .resume         = fsl_qspi_resume,

Why not in struct dev_pm_ops?

> +};


> +MODULE_AUTHOR("Freescale Semiconductor Inc."); MODULE_AUTHOR("Boris
> +Brezillion <boris.brezillon@bootlin.com>"); MODULE_AUTHOR("Frieder
> +Schrempf <frieder.schrempf@exceet.de>"); MODULE_LICENSE("GPL v2");

Wrong indentation.
Yogesh Narayan Gaur June 11, 2018, 6:31 a.m. | #10
Hi Boris,


-----Original Message-----
From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
Sent: Friday, June 8, 2018 6:22 PM
To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
Cc: Frieder Schrempf <frieder.schrempf@exceet.de>; linux-mtd@lists.infradead.org; linux-spi@vger.kernel.org; dwmw2@infradead.org; computersforpeace@gmail.com; marek.vasut@gmail.com; richard@nod.at; miquel.raynal@bootlin.com; broonie@kernel.org; David Wolfe <david.wolfe@nxp.com>; Fabio Estevam <fabio.estevam@nxp.com>; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org
Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

Hi Yogesh,

On Fri, 8 Jun 2018 11:54:12 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Hi Frieder,
> 
> I have tried to validate your patch on fsl,ls2080a target having 2 Spansion NOR flash, S25FS512S, as slave device.
> Below are my observations:
> 
> Observation 1:
> In Linux boot logs after driver probing is successful, getting below log messages
> [    1.435986] m25p80 spi0.0: found s25fl512s, expected m25p80
> [    1.441564] m25p80 spi0.0: s25fl512s (65536 Kbytes)
> [    1.446972] m25p80 spi0.1: found s25fl512s, expected m25p80
> [    1.452548] m25p80 spi0.1: s25fl512s (65536 Kbytes)
> 
> IMHO, we need to correct message as 'found s25fl512s, expected m25p80' as final underlying connected flash device is s25fl512s.

Not sure what you mean here. What would you like us to fix exactly?

> 
> Observation 2:
> I have observed data sanity issue after performing read/write 
> operations using MTD interface. Explained below
> 
> root:~# mtd_debug erase /dev/mtd0 0x1000000 0x40000
> Erased 262144 bytes from address 0x01000000 in flash                      --> Erase at address 0x1000000 of erase size 0x40000
> root:~# mtd_debug read /dev/mtd0 0x0 0x100 rp
> Copied 256 bytes from address 0x00000000 in flash to rp                   --> Read 0x100 bytes from flash from address 0x0 in file rp
> root:~# mtd_debug write /dev/mtd0 0x1000000 0x100 rp
> Copied 256 bytes from rp to address 0x01000000 in flash                   --> Write 0x100 bytes to flash address 0x1000000 from file rp
> root:~# mtd_debug read /dev/mtd0 0x1000000 0x100 wp
> Copied 256 bytes from address 0x01000000 in flash to wp                  --> Read 0x100 bytes from flash from address 0x1000000 in file wp
> root:~# diff rp wp                                                                                           --> compare both rp and wp files, if they are different output comes on console stating file are different
> Files rp and wp differ
> root:~# hexdump wp
> 0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
> 0000010 0000 0000 0000 0000 0000 0000 0000 000a
> 0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
> 0000030 0000 0000 0040 0000 005b 0000 0000 0000
> 0000040 ffff ffff ffff ffff ffff ffff ffff ffff
> *
> 0000100
> root:~# hexdump rp
> 0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
> 0000010 0000 0000 0000 0000 0000 0000 0000 000a
> 0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
> 0000030 0000 0000 0040 0000 005b 0000 0000 0000
> 0000040 2403 0000 0000 0000 0000 0000 0000 0000
> 0000050 0000 0000 0000 0000 0000 0000 0000 0000
> *
> 0000070 0011 0000 09e7 0000 0000 4411 9555 0050
> 0000080 0000 0000 0000 0000 f9bc afa1 0404 31e0
> 0000090 0000 0000 0400 31e0 0000 2010 08dc 31eb
> 00000a0 2880 0050 1300 31eb 4e20 8010 0000 80ff
> 00000b0 0000 0000 beef dead beef dead beef dead
> 00000c0 beef dead beef dead beef dead beef dead
> *
> 0000100
> root:~#
> 
> In hexdump output of the file which being read from address 0x1000000,wp, it can be observed that only first 64 bytes (0x40) are written on the flash.
> 
> Observation 3:
> As we can support JFFS2 filesystem on NOR flash, so we can expect JFFS2 commands should work fine on NOR flash.
> But with this driver change my mount command is not working.
> 
> In my target there are 2 flash slave devices connected, and I have given argument to create MTD partition like "mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs) " for 2nd flash.
> Below is output for /proc/mtd commands
>     root@ls1012ardb:~# cat /proc/mtd
>     dev:    size   erasesize  name
>     mtd0: 04000000 00040000 "20c0000.quadspi-0"   --> First 64MB flash
>     mtd1: 00500000 00040000 "rcw"                               --> Second 64 MB flash device, 3 MTD partition are created for it.
>     mtd2: 00a00000 00040000 "test"
>     mtd3: 02e00000 00040000 "rootfs"
> 
>     root@ls1012ardb:~# mkdir /media/ram ; flash_eraseall /dev/mtd3
>     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
>     Erasing 256 Kibyte @ 0 --  0 % complete [   18.299929] random: crng init done
>     Erasing 256 Kibyte @ 2dc0000 -- 100 % complete
>     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
> 
> This command didn't finish successfully and there are lot of messages coming on console mentioning failure in jffs2_scan_eraseblock()
>     [  187.118677] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0000: 0x2886 instead
>     [  187.128159] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0004: 0x7a3b instead
>     [  187.137641] jffs2: jffs2_scan_eraseblock(): Magic bitmask 
> 0x1985 not found at 0x013c0008: 0xb10f instead
> 
> If I remove this patch series and check with older implementation, JFFS2 mounting is working fine.

Problems 2 and 3 should definitely be fixed. That's weird because I remember that Frieder tested the new driver with a NOR chip, maybe not with JFFS2 though.

For write issue, it would be happening due to the changes pushed in spi-mem framework.

I have added my comment in that patch[1].

[1] https://patchwork.ozlabs.org/patch/869629/

> 
> Observation 4:
> With previous driver, we can read content of flash directly using 
> devmem command Like devmem 0x20000000  "Flash is connected at this Quad-SPI address"
> 
> But with new driver devmem interface reporting in-correct value.

This one is clearly not something we should fix. What you were doing is unsafe (accessing the direct mapping from userspace without making sure you're the only one to access the device), and making it even more broken is IMO a better thing. You want to access the memory from user-space, just use the standard MTD interface (/dev/mtdX).

Let me check how devmem interface is working.

--
Regards
Yogesh Gaur

[...]

> +
> +static void fsl_qspi_prepare_lut(struct fsl_qspi *q,
> +				 const struct spi_mem_op *op)
> +{
> +	void __iomem *base = q->iobase;
> +	u32 lutval[4] = {};
> +	int lutidx = 1, i;
> +
> +	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
> +			     op->cmd.opcode);
> +
> +	/*
> +	 * For some unknown reason, using LUT_ADDR doesn't work in some
> +	 * cases (at least with only one byte long addresses), so
> +	 * let's use LUT_MODE to write the address bytes one by one
> +	 */
> +	for (i = 0; i < op->addr.nbytes; i++) {
> +		u8 addrbyte = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
> +
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_MODE,
> +					      LUT_PAD(op->addr.buswidth),
> +					      addrbyte);
> +		lutidx++;
> +	}
> +
> 
> For ADDR filling in LUT we should use LUT_ADDR only, needs to find out the reason for the issue and we shouldn't use LUT_MODE here.

Just try with a 16-bit address and you'll see it does not work. I don't know why, and it's more something you should ask to someone working at NXP ;-).

> I have few more comments regarding same, mentioned below.
> 
> +	if (op->dummy.nbytes) {
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
> +					      LUT_PAD(op->dummy.buswidth),
> +					      op->dummy.nbytes * 8 /
> +					      op->dummy.buswidth);
> +		lutidx++;
> +	}
> +
> +	if (op->data.nbytes) {
> +		lutval[lutidx / 2] |= LUT_DEF(lutidx,
> +					      op->data.dir == SPI_MEM_DATA_IN ?
> +					      LUT_FSL_READ : LUT_FSL_WRITE,
> +					      LUT_PAD(op->data.buswidth),
> +					      0);
> +		lutidx++;
> +	}
> +
> +	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
> +
> +	/* unlock LUT */
> +	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
> +	qspi_writel(q, QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
> +
> +	/* fill LUT */
> +	for (i = 0; i < ARRAY_SIZE(lutval); i++)
> +		qspi_writel(q, lutval[i], base + QUADSPI_LUT_REG(i));
> +
> +	/* lock LUT */
> +	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
> +	qspi_writel(q, QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR); }
> +

[...]

> +static int fsl_qspi_exec_op(struct spi_mem *mem, const struct 
> +spi_mem_op *op) {
> +	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
> +	void __iomem *base = q->iobase;
> +	int err = 0;
> +
> +	mutex_lock(&q->lock);
> +
> +	/* wait for the controller being ready */
> +	do {
> +		u32 status;
> +
> +		status = qspi_readl(q, base + QUADSPI_SR);
> +		if (status &
> +		    (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
> +			udelay(1);
> +			dev_dbg(q->dev, "The controller is busy, 0x%x\n",
> +				status);
> +			continue;
> +		}
> +		break;
> +	} while (1);
> +
> +	fsl_qspi_select_mem(q, mem->spi);
> +
> +	qspi_writel(q, q->memmap_phy, base + QUADSPI_SFAR);
> 
> SFAR should have the actual address where we are doing operation.

Not with the new approach. SFAR is now automatically reconfigured at each access, and it works because we're not using a LUT_ADDR instruction but a LUT_MODE one. Sure, I'd prefer to go for the clean solution with a LUT_ADDR and the address passed through SFAR (+AHB offset), but it does not work with anything that is not 24 bits or
32 bits wide, which means it does not work when you need to access a SPI NAND device (on which some addresses are 16 bits wide).

> 
> For e.g. If reading from flash-0 offset 0x100000 than SFAR should have address as 0x20100000.
> As for 'read/write' request 'from/to' respectively been saved in struct spi_mem_op [op.val] this should be added to q->memmap_phy.

You're still thinking as if the driver was only controlling a NOR device which can be directly addressed. This is not the case for NAND devices where you first have to load the data in the NAND internal cache and then read data from the cache.

> 
> In LUT preparation for ADDR, we should use ADDR_WIDTH as 3-byte or 4-byte addressing only.

Please have a look at SPI NAND datasheets and you'll see it's simply not possible. So, either NXP doesn't want his QSPI controller to interface with anything except NORs or we have to use the trick we have here (LUT_MODE instead of LUT_ADDR).

> Start address should be saved in SFAR register.
> 
> +
> +	qspi_writel(q,
> +		    qspi_readl(q, base + QUADSPI_MCR) |
> +		    QUADSPI_MCR_CLR_RXF_MASK | QUADSPI_MCR_CLR_TXF_MASK,
> +		    base + QUADSPI_MCR);
> +
> +	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC | QUADSPI_SPTRCLR_IPPTRC,
> +		    base + QUADSPI_SPTRCLR);
> +
> +	fsl_qspi_prepare_lut(q, op);
> +
> +	/*
> +	 * If we have large chunks of data, we read them through the AHB bus
> +	 * by accessing the mapped memory. In all other cases we use
> +	 * IP commands to access the flash.
> +	 */
> +	if (op->data.nbytes > (q->devtype_data->rxfifo - 4) &&
> +	    op->data.dir == SPI_MEM_DATA_IN) {
> +		fsl_qspi_read_ahb(q, op);
> +	} else {
> +		qspi_writel(q,
> +			    QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
> +			    base + QUADSPI_RBCT);
> +
> +		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
> +			fsl_qspi_fill_txfifo(q, op);
> +
> +		err = fsl_qspi_do_op(q, op);
> +	}
> +
> +	mutex_unlock(&q->lock);
> +
> +	return err;
> +}

[...]

> 
> Also we should add more debug print messages under dev_dbg() like in func like fsl_qspi_prepare_lut() etc.
> 

Would you mind giving more details about where you'd like this traces to be placed exactly and what information you'd like to display?

Thanks,

Boris
Boris Brezillon June 11, 2018, 7:46 a.m. | #11
Hi Yogesh,

On Mon, 11 Jun 2018 06:31:00 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> 
> > 
> > Observation 2:
> > I have observed data sanity issue after performing read/write 
> > operations using MTD interface. Explained below
> > 
> > root:~# mtd_debug erase /dev/mtd0 0x1000000 0x40000
> > Erased 262144 bytes from address 0x01000000 in flash                      --> Erase at address 0x1000000 of erase size 0x40000
> > root:~# mtd_debug read /dev/mtd0 0x0 0x100 rp
> > Copied 256 bytes from address 0x00000000 in flash to rp                   --> Read 0x100 bytes from flash from address 0x0 in file rp
> > root:~# mtd_debug write /dev/mtd0 0x1000000 0x100 rp
> > Copied 256 bytes from rp to address 0x01000000 in flash                   --> Write 0x100 bytes to flash address 0x1000000 from file rp
> > root:~# mtd_debug read /dev/mtd0 0x1000000 0x100 wp
> > Copied 256 bytes from address 0x01000000 in flash to wp                  --> Read 0x100 bytes from flash from address 0x1000000 in file wp
> > root:~# diff rp wp                                                                                           --> compare both rp and wp files, if they are different output comes on console stating file are different
> > Files rp and wp differ
> > root:~# hexdump wp
> > 0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
> > 0000010 0000 0000 0000 0000 0000 0000 0000 000a
> > 0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
> > 0000030 0000 0000 0040 0000 005b 0000 0000 0000
> > 0000040 ffff ffff ffff ffff ffff ffff ffff ffff
> > *
> > 0000100
> > root:~# hexdump rp
> > 0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
> > 0000010 0000 0000 0000 0000 0000 0000 0000 000a
> > 0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
> > 0000030 0000 0000 0040 0000 005b 0000 0000 0000
> > 0000040 2403 0000 0000 0000 0000 0000 0000 0000
> > 0000050 0000 0000 0000 0000 0000 0000 0000 0000
> > *
> > 0000070 0011 0000 09e7 0000 0000 4411 9555 0050
> > 0000080 0000 0000 0000 0000 f9bc afa1 0404 31e0
> > 0000090 0000 0000 0400 31e0 0000 2010 08dc 31eb
> > 00000a0 2880 0050 1300 31eb 4e20 8010 0000 80ff
> > 00000b0 0000 0000 beef dead beef dead beef dead
> > 00000c0 beef dead beef dead beef dead beef dead
> > *
> > 0000100
> > root:~#
> > 
> > In hexdump output of the file which being read from address 0x1000000,wp, it can be observed that only first 64 bytes (0x40) are written on the flash.
> > 
> > Observation 3:
> > As we can support JFFS2 filesystem on NOR flash, so we can expect JFFS2 commands should work fine on NOR flash.
> > But with this driver change my mount command is not working.
> > 
> > In my target there are 2 flash slave devices connected, and I have given argument to create MTD partition like "mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs) " for 2nd flash.
> > Below is output for /proc/mtd commands
> >     root@ls1012ardb:~# cat /proc/mtd
> >     dev:    size   erasesize  name
> >     mtd0: 04000000 00040000 "20c0000.quadspi-0"   --> First 64MB flash
> >     mtd1: 00500000 00040000 "rcw"                               --> Second 64 MB flash device, 3 MTD partition are created for it.
> >     mtd2: 00a00000 00040000 "test"
> >     mtd3: 02e00000 00040000 "rootfs"
> > 
> >     root@ls1012ardb:~# mkdir /media/ram ; flash_eraseall /dev/mtd3
> >     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
> >     Erasing 256 Kibyte @ 0 --  0 % complete [   18.299929] random: crng init done
> >     Erasing 256 Kibyte @ 2dc0000 -- 100 % complete
> >     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
> > 
> > This command didn't finish successfully and there are lot of messages coming on console mentioning failure in jffs2_scan_eraseblock()
> >     [  187.118677] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0000: 0x2886 instead
> >     [  187.128159] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0004: 0x7a3b instead
> >     [  187.137641] jffs2: jffs2_scan_eraseblock(): Magic bitmask 
> > 0x1985 not found at 0x013c0008: 0xb10f instead
> > 
> > If I remove this patch series and check with older implementation, JFFS2 mounting is working fine.  
> 
> Problems 2 and 3 should definitely be fixed. That's weird because I remember that Frieder tested the new driver with a NOR chip, maybe not with JFFS2 though.
> 
> For write issue, it would be happening due to the changes pushed in spi-mem framework.

Now I understand why Frieder didn't face this issue: he was testing on
an imx6 which has a 512 bytes TX FIFO, while you're probably testing on
a vhybrid or layerscape platform which only has a 64 bytes TX FIFO.

I think it's time to accept having partial page writes. This has come
up several times (last time was [1]) and it looks like the fsl quadspi
driver was already doing this sort of things (well hidden in the probe
path [2] :-)).

Marek, any comment on that?

Regards,

Boris

[1]https://patchwork.ozlabs.org/patch/905507/
[2]https://elixir.bootlin.com/linux/v4.17/source/drivers/mtd/spi-nor/fsl-quadspi.c#L1106
Yogesh Narayan Gaur June 11, 2018, 9:38 a.m. | #12
Hi Boris,

-----Original Message-----
From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
Sent: Monday, June 11, 2018 1:16 PM
To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>; marek.vasut@gmail.com
Cc: Frieder Schrempf <frieder.schrempf@exceet.de>; linux-mtd@lists.infradead.org; linux-spi@vger.kernel.org; dwmw2@infradead.org; computersforpeace@gmail.com; richard@nod.at; miquel.raynal@bootlin.com; broonie@kernel.org; David Wolfe <david.wolfe@nxp.com>; Fabio Estevam <fabio.estevam@nxp.com>; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org
Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

Hi Yogesh,

On Mon, 11 Jun 2018 06:31:00 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> 
> > 
> > Observation 2:
> > I have observed data sanity issue after performing read/write 
> > operations using MTD interface. Explained below
> > 
> > root:~# mtd_debug erase /dev/mtd0 0x1000000 0x40000
> > Erased 262144 bytes from address 0x01000000 in flash                      --> Erase at address 0x1000000 of erase size 0x40000
> > root:~# mtd_debug read /dev/mtd0 0x0 0x100 rp
> > Copied 256 bytes from address 0x00000000 in flash to rp                   --> Read 0x100 bytes from flash from address 0x0 in file rp
> > root:~# mtd_debug write /dev/mtd0 0x1000000 0x100 rp
> > Copied 256 bytes from rp to address 0x01000000 in flash                   --> Write 0x100 bytes to flash address 0x1000000 from file rp
> > root:~# mtd_debug read /dev/mtd0 0x1000000 0x100 wp
> > Copied 256 bytes from address 0x01000000 in flash to wp                  --> Read 0x100 bytes from flash from address 0x1000000 in file wp
> > root:~# diff rp wp                                                                                           --> compare both rp and wp files, if they are different output comes on console stating file are different
> > Files rp and wp differ
> > root:~# hexdump wp
> > 0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
> > 0000010 0000 0000 0000 0000 0000 0000 0000 000a
> > 0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
> > 0000030 0000 0000 0040 0000 005b 0000 0000 0000
> > 0000040 ffff ffff ffff ffff ffff ffff ffff ffff
> > *
> > 0000100
> > root:~# hexdump rp
> > 0000000 aa55 aa55 0000 8010 541c 4000 0040 0000
> > 0000010 0000 0000 0000 0000 0000 0000 0000 000a
> > 0000020 0000 0030 0000 0000 11a0 00a0 2580 0000
> > 0000030 0000 0000 0040 0000 005b 0000 0000 0000
> > 0000040 2403 0000 0000 0000 0000 0000 0000 0000
> > 0000050 0000 0000 0000 0000 0000 0000 0000 0000
> > *
> > 0000070 0011 0000 09e7 0000 0000 4411 9555 0050
> > 0000080 0000 0000 0000 0000 f9bc afa1 0404 31e0
> > 0000090 0000 0000 0400 31e0 0000 2010 08dc 31eb
> > 00000a0 2880 0050 1300 31eb 4e20 8010 0000 80ff
> > 00000b0 0000 0000 beef dead beef dead beef dead
> > 00000c0 beef dead beef dead beef dead beef dead
> > *
> > 0000100
> > root:~#
> > 
> > In hexdump output of the file which being read from address 0x1000000,wp, it can be observed that only first 64 bytes (0x40) are written on the flash.
> > 
> > Observation 3:
> > As we can support JFFS2 filesystem on NOR flash, so we can expect JFFS2 commands should work fine on NOR flash.
> > But with this driver change my mount command is not working.
> > 
> > In my target there are 2 flash slave devices connected, and I have given argument to create MTD partition like "mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs) " for 2nd flash.
> > Below is output for /proc/mtd commands
> >     root@ls1012ardb:~# cat /proc/mtd
> >     dev:    size   erasesize  name
> >     mtd0: 04000000 00040000 "20c0000.quadspi-0"   --> First 64MB flash
> >     mtd1: 00500000 00040000 "rcw"                               --> Second 64 MB flash device, 3 MTD partition are created for it.
> >     mtd2: 00a00000 00040000 "test"
> >     mtd3: 02e00000 00040000 "rootfs"
> > 
> >     root@ls1012ardb:~# mkdir /media/ram ; flash_eraseall /dev/mtd3
> >     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
> >     Erasing 256 Kibyte @ 0 --  0 % complete [   18.299929] random: crng init done
> >     Erasing 256 Kibyte @ 2dc0000 -- 100 % complete
> >     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
> > 
> > This command didn't finish successfully and there are lot of messages coming on console mentioning failure in jffs2_scan_eraseblock()
> >     [  187.118677] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0000: 0x2886 instead
> >     [  187.128159] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0004: 0x7a3b instead
> >     [  187.137641] jffs2: jffs2_scan_eraseblock(): Magic bitmask
> > 0x1985 not found at 0x013c0008: 0xb10f instead
> > 
> > If I remove this patch series and check with older implementation, JFFS2 mounting is working fine.  
> 
> Problems 2 and 3 should definitely be fixed. That's weird because I remember that Frieder tested the new driver with a NOR chip, maybe not with JFFS2 though.
> 
> For write issue, it would be happening due to the changes pushed in spi-mem framework.

Now I understand why Frieder didn't face this issue: he was testing on an imx6 which has a 512 bytes TX FIFO, while you're probably testing on a vhybrid or layerscape platform which only has a 64 bytes TX FIFO.

I think it's time to accept having partial page writes. This has come up several times (last time was [1]) and it looks like the fsl quadspi driver was already doing this sort of things (well hidden in the probe path [2] :-)).

Marek, any comment on that?

Regards,

Boris

[1]https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.ozlabs.org%2Fpatch%2F905507%2F&data=02%7C01%7Cyogeshnarayan.gaur%40nxp.com%7C6f2e208553754619956f08d5cf6f71f6%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C636642999952927107&sdata=GrexQ%2FjjJVU282cKr4CuVnYg5NvBL9ZZDFeIcBSBB6k%3D&reserved=0
[2]https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv4.17%2Fsource%2Fdrivers%2Fmtd%2Fspi-nor%2Ffsl-quadspi.c%23L1106&data=02%7C01%7Cyogeshnarayan.gaur%40nxp.com%7C6f2e208553754619956f08d5cf6f71f6%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C636642999952927107&sdata=kIrwvaYA4RrhghhNx6iXsGcEE2j2KY%2BhMJdRRIuu8vo%3D&reserved=0

I have send the patch[1] based on shared patch for review, this patch is based on the git[2]
With this change, my write is start working for data size requested bigger than TX FIFO size but JFFS2 mounting is still failing.

--
Regards
Yogesh Gaur.

[1] https://patchwork.ozlabs.org/patch/927587/
[2] https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git/log/?h=for-4.18
Boris Brezillon June 11, 2018, 10:16 a.m. | #13
On Mon, 11 Jun 2018 09:38:14 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> > > Observation 3:
> > > As we can support JFFS2 filesystem on NOR flash, so we can expect JFFS2 commands should work fine on NOR flash.
> > > But with this driver change my mount command is not working.
> > > 
> > > In my target there are 2 flash slave devices connected, and I have given argument to create MTD partition like "mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs) " for 2nd flash.
> > > Below is output for /proc/mtd commands
> > >     root@ls1012ardb:~# cat /proc/mtd
> > >     dev:    size   erasesize  name
> > >     mtd0: 04000000 00040000 "20c0000.quadspi-0"   --> First 64MB flash
> > >     mtd1: 00500000 00040000 "rcw"                               --> Second 64 MB flash device, 3 MTD partition are created for it.
> > >     mtd2: 00a00000 00040000 "test"
> > >     mtd3: 02e00000 00040000 "rootfs"

When I do mtd1 + mtd2 + mtd3, I end up with 0x3d00000 instead of
0x4000000. Is that normal? Do you reserve a bit of space at the end or
is it that rcw is not starting at 0?

> > > 
> > >     root@ls1012ardb:~# mkdir /media/ram ; flash_eraseall /dev/mtd3
> > >     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
> > >     Erasing 256 Kibyte @ 0 --  0 % complete [   18.299929] random: crng init done
> > >     Erasing 256 Kibyte @ 2dc0000 -- 100 % complete
> > >     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
> > > 
> > > This command didn't finish successfully and there are lot of messages coming on console mentioning failure in jffs2_scan_eraseblock()
> > >     [  187.118677] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0000: 0x2886 instead

Did you try to create a smaller partition? Maybe we have a problem when
accessing addresses higher than X with the new driver (X to be
determined).

> > >     [  187.128159] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0004: 0x7a3b instead
> > >     [  187.137641] jffs2: jffs2_scan_eraseblock(): Magic bitmask
> > > 0x1985 not found at 0x013c0008: 0xb10f instead
> > >
Yogesh Narayan Gaur June 11, 2018, 10:21 a.m. | #14
Hi Boris,

-----Original Message-----
From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
Sent: Monday, June 11, 2018 3:46 PM
To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
Cc: marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>; linux-mtd@lists.infradead.org; linux-spi@vger.kernel.org; dwmw2@infradead.org; computersforpeace@gmail.com; richard@nod.at; miquel.raynal@bootlin.com; broonie@kernel.org; David Wolfe <david.wolfe@nxp.com>; Fabio Estevam <fabio.estevam@nxp.com>; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org
Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

On Mon, 11 Jun 2018 09:38:14 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> > > Observation 3:
> > > As we can support JFFS2 filesystem on NOR flash, so we can expect JFFS2 commands should work fine on NOR flash.
> > > But with this driver change my mount command is not working.
> > > 
> > > In my target there are 2 flash slave devices connected, and I have given argument to create MTD partition like "mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs) " for 2nd flash.
> > > Below is output for /proc/mtd commands
> > >     root@ls1012ardb:~# cat /proc/mtd
> > >     dev:    size   erasesize  name
> > >     mtd0: 04000000 00040000 "20c0000.quadspi-0"   --> First 64MB flash
> > >     mtd1: 00500000 00040000 "rcw"                               --> Second 64 MB flash device, 3 MTD partition are created for it.
> > >     mtd2: 00a00000 00040000 "test"
> > >     mtd3: 02e00000 00040000 "rootfs"

When I do mtd1 + mtd2 + mtd3, I end up with 0x3d00000 instead of 0x4000000. Is that normal? Do you reserve a bit of space at the end or is it that rcw is not starting at 0?

I have given partition size n bootargs as mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs)
5 + 10 + 46 ==> 61M i.e. 0x3d00000.
I have just reserve the bit at the end, we can modify these settings also.

> > > 
> > >     root@ls1012ardb:~# mkdir /media/ram ; flash_eraseall /dev/mtd3
> > >     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
> > >     Erasing 256 Kibyte @ 0 --  0 % complete [   18.299929] random: crng init done
> > >     Erasing 256 Kibyte @ 2dc0000 -- 100 % complete
> > >     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
> > > 
> > > This command didn't finish successfully and there are lot of messages coming on console mentioning failure in jffs2_scan_eraseblock()
> > >     [  187.118677] jffs2: jffs2_scan_eraseblock(): Magic bitmask 
> > > 0x1985 not found at 0x013c0000: 0x2886 instead

Did you try to create a smaller partition? Maybe we have a problem when accessing addresses higher than X with the new driver (X to be determined).

Would try and update you.

--
Regards
Yogesh Gaur

> > >     [  187.128159] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0004: 0x7a3b instead
> > >     [  187.137641] jffs2: jffs2_scan_eraseblock(): Magic bitmask
> > > 0x1985 not found at 0x013c0008: 0xb10f instead
> > >
Yogesh Narayan Gaur June 12, 2018, 6:42 a.m. | #15
Hi Boris,

-----Original Message-----
From: linux-mtd [mailto:linux-mtd-bounces@lists.infradead.org] On Behalf Of Yogesh Narayan Gaur
Sent: Monday, June 11, 2018 3:51 PM
To: Boris Brezillon <boris.brezillon@bootlin.com>
Cc: richard@nod.at; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org; linux-spi@vger.kernel.org; marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>; broonie@kernel.org; linux-mtd@lists.infradead.org; miquel.raynal@bootlin.com; Fabio Estevam <fabio.estevam@nxp.com>; David Wolfe <david.wolfe@nxp.com>; computersforpeace@gmail.com; dwmw2@infradead.org
Subject: RE: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

Hi Boris,

-----Original Message-----
From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
Sent: Monday, June 11, 2018 3:46 PM
To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
Cc: marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>; linux-mtd@lists.infradead.org; linux-spi@vger.kernel.org; dwmw2@infradead.org; computersforpeace@gmail.com; richard@nod.at; miquel.raynal@bootlin.com; broonie@kernel.org; David Wolfe <david.wolfe@nxp.com>; Fabio Estevam <fabio.estevam@nxp.com>; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org
Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

On Mon, 11 Jun 2018 09:38:14 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> > > Observation 3:
> > > As we can support JFFS2 filesystem on NOR flash, so we can expect JFFS2 commands should work fine on NOR flash.
> > > But with this driver change my mount command is not working.
> > > 
> > > In my target there are 2 flash slave devices connected, and I have given argument to create MTD partition like "mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs) " for 2nd flash.
> > > Below is output for /proc/mtd commands
> > >     root@ls1012ardb:~# cat /proc/mtd
> > >     dev:    size   erasesize  name
> > >     mtd0: 04000000 00040000 "20c0000.quadspi-0"   --> First 64MB flash
> > >     mtd1: 00500000 00040000 "rcw"                               --> Second 64 MB flash device, 3 MTD partition are created for it.
> > >     mtd2: 00a00000 00040000 "test"
> > >     mtd3: 02e00000 00040000 "rootfs"

When I do mtd1 + mtd2 + mtd3, I end up with 0x3d00000 instead of 0x4000000. Is that normal? Do you reserve a bit of space at the end or is it that rcw is not starting at 0?

I have given partition size n bootargs as mtdparts=20c0000.quadspi-1:5M(rcw),10M(test),46M(rootfs)
5 + 10 + 46 ==> 61M i.e. 0x3d00000.
I have just reserve the bit at the end, we can modify these settings also.

> > > 
> > >     root@ls1012ardb:~# mkdir /media/ram ; flash_eraseall /dev/mtd3
> > >     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
> > >     Erasing 256 Kibyte @ 0 --  0 % complete [   18.299929] random: crng init done
> > >     Erasing 256 Kibyte @ 2dc0000 -- 100 % complete
> > >     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
> > > 
> > > This command didn't finish successfully and there are lot of messages coming on console mentioning failure in jffs2_scan_eraseblock()
> > >     [  187.118677] jffs2: jffs2_scan_eraseblock(): Magic bitmask 
> > > 0x1985 not found at 0x013c0000: 0x2886 instead

>> Did you try to create a smaller partition? Maybe we have a problem when accessing addresses higher than X with the new driver (X to be determined).

> Would try and update you.

I have tried JFFS2 mounting with smaller partition size but still getting failure.
For partition size equal or less than 1MB, getting errors as
    [   25.044930] jffs2: Too few erase blocks (4)
Thus, need to have size more than 1MB.

For 2MB partition size getting error message from jffs2_scan_eraseblock().
    root@ls1012ardb:~# cat /proc/mtd
    dev:    size   erasesize  name
    mtd0: 04000000 00040000 "20c0000.quadspi-0"
    mtd1: 00500000 00040000 "rcw"
    mtd2: 00a00000 00040000 "test"
    mtd3: 00200000 00040000 "rootfs"
    root@ls1012ardb:~#  mkdir /media/ram ; flash_eraseall /dev/mtd3
    flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
    Erasing 256 Kibyte @ 1c0000 -- 100 % complete
    root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
    [   26.380989] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x00000000: 0x0dd0 instead
    [   26.390509] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x0000004c: 0x7366 instead
    [   26.399999] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x00000050: 0x736c instead

--
Regards
Yogesh Gaur

> > >     [  187.128159] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x013c0004: 0x7a3b instead
> > >     [  187.137641] jffs2: jffs2_scan_eraseblock(): Magic bitmask
> > > 0x1985 not found at 0x013c0008: 0xb10f instead
> > >
Boris Brezillon June 12, 2018, 7:13 a.m. | #16
On Tue, 12 Jun 2018 06:42:42 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> I have tried JFFS2 mounting with smaller partition size but still getting failure.
> For partition size equal or less than 1MB, getting errors as
>     [   25.044930] jffs2: Too few erase blocks (4)
> Thus, need to have size more than 1MB.
> 
> For 2MB partition size getting error message from jffs2_scan_eraseblock().
>     root@ls1012ardb:~# cat /proc/mtd
>     dev:    size   erasesize  name
>     mtd0: 04000000 00040000 "20c0000.quadspi-0"
>     mtd1: 00500000 00040000 "rcw"
>     mtd2: 00a00000 00040000 "test"
>     mtd3: 00200000 00040000 "rootfs"
>     root@ls1012ardb:~#  mkdir /media/ram ; flash_eraseall /dev/mtd3
>     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
>     Erasing 256 Kibyte @ 1c0000 -- 100 % complete
>     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
>     [   26.380989] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x00000000: 0x0dd0 instead
>     [   26.390509] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x0000004c: 0x7366 instead
>     [   26.399999] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x00000050: 0x736c instead

That's weird. Can you tell me on which platform you're testing?
lsxxx or vf610? Can you dump the NOR after the erase to make sure the
memory is actually erased (filled with 0xff)?
Yogesh Narayan Gaur June 12, 2018, 8:51 a.m. | #17
Hi Boris,

-----Original Message-----
From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
Sent: Tuesday, June 12, 2018 12:43 PM
To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
Cc: richard@nod.at; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org; linux-spi@vger.kernel.org; marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>; broonie@kernel.org; linux-mtd@lists.infradead.org; miquel.raynal@bootlin.com; Fabio Estevam <fabio.estevam@nxp.com>; David Wolfe <david.wolfe@nxp.com>; computersforpeace@gmail.com; dwmw2@infradead.org
Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

On Tue, 12 Jun 2018 06:42:42 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> I have tried JFFS2 mounting with smaller partition size but still getting failure.
> For partition size equal or less than 1MB, getting errors as
>     [   25.044930] jffs2: Too few erase blocks (4)
> Thus, need to have size more than 1MB.
> 
> For 2MB partition size getting error message from jffs2_scan_eraseblock().
>     root@ls1012ardb:~# cat /proc/mtd
>     dev:    size   erasesize  name
>     mtd0: 04000000 00040000 "20c0000.quadspi-0"
>     mtd1: 00500000 00040000 "rcw"
>     mtd2: 00a00000 00040000 "test"
>     mtd3: 00200000 00040000 "rootfs"
>     root@ls1012ardb:~#  mkdir /media/ram ; flash_eraseall /dev/mtd3
>     flash_eraseall has been replaced by `flash_erase <mtddev> 0 0`; please use it
>     Erasing 256 Kibyte @ 1c0000 -- 100 % complete
>     root@ls1012ardb:~# mount -t jffs2 /dev/mtdblock3 /media/ram/
>     [   26.380989] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x00000000: 0x0dd0 instead
>     [   26.390509] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x0000004c: 0x7366 instead
>     [   26.399999] jffs2: jffs2_scan_eraseblock(): Magic bitmask 0x1985 not found at 0x00000050: 0x736c instead

That's weird. Can you tell me on which platform you're testing?
lsxxx or vf610? Can you dump the NOR after the erase to make sure the memory is actually erased (filled with 0xff)?

I am working on lsxxx platform. With further debugging, I found that my erase operation for second flash device is not working properly.
Need to have debugging for this in Frieder Patch.

When I have created multiple partition for First flash device, then JFFS2 mounting and booting of Linux kernel from rootfstype=jffs2 is successful.
    root@ls1012ardb:~# cat /proc/mtd
    dev:    size   erasesize  name
    mtd0: 00500000 00040000 "rcw"
    mtd1: 00a00000 00040000 "test"
    mtd2: 02e00000 00040000 "rootfs"
    mtd3: 04000000 00040000 "20c0000.quadspi-1"
In above list, for MTD2 partition, able to perform JFFS2 mounting.

Below is logs of erase for both flashes:
    root@ls1012ardb:~# cat /proc/mtd
    dev:    size   erasesize  name
    mtd0: 04000000 00040000 "20c0000.quadspi-0"
    mtd1: 04000000 00040000 "20c0000.quadspi-1"
    root@ls1012ardb:~# mtd_debug erase /dev/mtd0 0x1000000 0x2000000
    Erased 33554432 bytes from address 0x01000000 in flash
    root@ls1012ardb:~#
    root@ls1012ardb:~# mtd_debug read /dev/mtd0 0x1000000 0xa00000 rp
    Copied 10485760 bytes from address 0x01000000 in flash to rp
    root@ls1012ardb:~# hexdump rp
    0000000 ffff ffff ffff ffff ffff ffff ffff ffff
    *
    0a00000
    root@ls1012ardb:~#
    root@ls1012ardb:~# mtd_debug erase /dev/mtd1 0x1000000 0x2000000
     [   25.023027] random: crng init done
    Erased 33554432 bytes from address 0x01000000 in flash
    root@ls1012ardb:~# mtd_debug read /dev/mtd1 0x1000000 0xa00000 rp
    Copied 10485760 bytes from address 0x01000000 in flash to rp
    root@ls1012ardb:~#
    root@ls1012ardb:~# hexdump rp
    0000000 1985 2003 000c 0000 b0b1 e41e ffff ffff
    0000010 ffff ffff ffff ffff ffff ffff ffff ffff
    *
    0040000 1985 2003 000c 0000 b0b1 e41e ffff ffff
    0040010 ffff ffff ffff ffff ffff ffff ffff ffff

--
Yogesh Gaur
Boris Brezillon June 15, 2018, 12:50 p.m. | #18
On Tue, 12 Jun 2018 08:51:25 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> 
> I am working on lsxxx platform. With further debugging, I found that my erase operation for second flash device is not working properly.
> Need to have debugging for this in Frieder Patch.

Did you find the problem? Could it be a wrong "reg = <>" definition in
your DT (Frieder changed the CS numbering scheme in the new driver)?
Yogesh Narayan Gaur June 15, 2018, 1:42 p.m. | #19
Hi Boris,

I am still debugging the issue.
With some analysis, able to check that proper values are not being written for QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD register.

In current code, value of map_addr are being assigned to these register.
             map_addr = q->memmap_phy +
                        2 * q->devtype_data->ahb_buf_size;

     qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));

But instead of "q->devtype_data->ahb_buf_size" it should be flash size. 
For my case flash size is 0x4000000 and with this hard coded value I am able to perform Write and Erase operation.
One more change, I have to do is adding the flash_size when writing the base_address in SFAR register for case when "mem->spi->chip_select == 1"
	qspi_writel(q, q->memmap_phy + 0x4000000, base + QUADSPI_SFAR);

Thus, there should be mechanism or the entry in structure where we can have the information of the size of the connected slave device.

With both of above hardcoded changes, I am able to perform Write and Erase operation on my second flash device but still facing issue in Read operation, debugging in progress for that.

--
Regards
Yogesh Gaur


-----Original Message-----
From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
Sent: Friday, June 15, 2018 6:20 PM
To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
Cc: richard@nod.at; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org; linux-spi@vger.kernel.org; marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>; broonie@kernel.org; linux-mtd@lists.infradead.org; miquel.raynal@bootlin.com; Fabio Estevam <fabio.estevam@nxp.com>; David Wolfe <david.wolfe@nxp.com>; computersforpeace@gmail.com; dwmw2@infradead.org
Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

On Tue, 12 Jun 2018 08:51:25 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> 
> I am working on lsxxx platform. With further debugging, I found that my erase operation for second flash device is not working properly.
> Need to have debugging for this in Frieder Patch.

Did you find the problem? Could it be a wrong "reg = <>" definition in your DT (Frieder changed the CS numbering scheme in the new driver)?
Boris Brezillon June 15, 2018, 1:55 p.m. | #20
On Fri, 15 Jun 2018 13:42:12 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Hi Boris,
> 
> I am still debugging the issue.
> With some analysis, able to check that proper values are not being written for QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD register.
> 
> In current code, value of map_addr are being assigned to these register.
>              map_addr = q->memmap_phy +
>                         2 * q->devtype_data->ahb_buf_size;
> 
>      qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
> 
> But instead of "q->devtype_data->ahb_buf_size" it should be flash size.

No, because we're only using 2 * ->ahb_buf_size in the direct mapping
for each device, and we're modifying the mapping dynamically based on
the selected device. Maybe we got the logic wrong though.

> For my case flash size is 0x4000000 and with this hard coded value I am able to perform Write and Erase operation.
> One more change, I have to do is adding the flash_size when writing the base_address in SFAR register for case when "mem->spi->chip_select == 1"
> 	qspi_writel(q, q->memmap_phy + 0x4000000, base + QUADSPI_SFAR);

I don't want to expose the full device in the direct mapping yet
(that's part of the direct-mapping API I posted here [1]). What this
version of the driver does is, map only 2 time the ahb_size so that we
can bypass the internal cache of the QSPI engine.

> 
> Thus, there should be mechanism or the entry in structure where we can have the information of the size of the connected slave device.

Because that's exactly the kind of thing I'd like to avoid. What if the
device is bigger than the reserved memory region? What if the sum of
all devices does not fit in there? Here I tried to support all cases by
just mapping the portion of memory we need.
Boris Brezillon June 15, 2018, 1:58 p.m. | #21
On Fri, 15 Jun 2018 15:55:41 +0200
Boris Brezillon <boris.brezillon@bootlin.com> wrote:

> On Fri, 15 Jun 2018 13:42:12 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
> > Hi Boris,
> > 
> > I am still debugging the issue.
> > With some analysis, able to check that proper values are not being written for QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD register.
> > 
> > In current code, value of map_addr are being assigned to these register.
> >              map_addr = q->memmap_phy +
> >                         2 * q->devtype_data->ahb_buf_size;
> > 
> >      qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
> > 
> > But instead of "q->devtype_data->ahb_buf_size" it should be flash size.  
> 
> No, because we're only using 2 * ->ahb_buf_size in the direct mapping
> for each device, and we're modifying the mapping dynamically based on
> the selected device. Maybe we got the logic wrong though.
> 
> > For my case flash size is 0x4000000 and with this hard coded value I am able to perform Write and Erase operation.
> > One more change, I have to do is adding the flash_size when writing the base_address in SFAR register for case when "mem->spi->chip_select == 1"
> > 	qspi_writel(q, q->memmap_phy + 0x4000000, base + QUADSPI_SFAR);  
> 
> I don't want to expose the full device in the direct mapping yet
> (that's part of the direct-mapping API I posted here [1]). What this
> version of the driver does is, map only 2 time the ahb_size so that we
> can bypass the internal cache of the QSPI engine.

Oops, forgot to add the link.

[1]http://lists.infradead.org/pipermail/linux-mtd/2018-June/081460.html
Yogesh Narayan Gaur June 18, 2018, 1:32 p.m. | #22
-----Original Message-----
From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
Sent: Friday, June 15, 2018 7:26 PM
To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>; Fabio Estevam <fabio.estevam@nxp.com>; David Wolfe <david.wolfe@nxp.com>; dwmw2@infradead.org
Cc: richard@nod.at; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org; linux-spi@vger.kernel.org; marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>; broonie@kernel.org; linux-mtd@lists.infradead.org; miquel.raynal@bootlin.com; computersforpeace@gmail.com
Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

On Fri, 15 Jun 2018 13:42:12 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Hi Boris,
> 
> I am still debugging the issue.
> With some analysis, able to check that proper values are not being written for QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD register.
> 
> In current code, value of map_addr are being assigned to these register.
>              map_addr = q->memmap_phy +
>                         2 * q->devtype_data->ahb_buf_size;
> 
>      qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
> 
> But instead of "q->devtype_data->ahb_buf_size" it should be flash size.

No, because we're only using 2 * ->ahb_buf_size in the direct mapping for each device, and we're modifying the mapping dynamically based on the selected device. Maybe we got the logic wrong though.

Yes, for register QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD, we need to save starting actual address from where this flash is getting started.
Thus, if my first flash size is 64MB, then register QUADSPI_SFA2AD would have value of q->memmap_phy + 0x4000000 i.e. (QUADSPI_SFA1AD + sizeof First Flash)
If second flash is of size 32MB, then register QUADSPI_SFB1AD would have value of value of QUADSPI_SFA2AD + sizeof second flash.

> For my case flash size is 0x4000000 and with this hard coded value I am able to perform Write and Erase operation.
> One more change, I have to do is adding the flash_size when writing the base_address in SFAR register for case when "mem->spi->chip_select == 1"
> 	qspi_writel(q, q->memmap_phy + 0x4000000, base + QUADSPI_SFAR);

I don't want to expose the full device in the direct mapping yet (that's part of the direct-mapping API I posted here [1]). What this version of the driver does is, map only 2 time the ahb_size so that we can bypass the internal cache of the QSPI engine.

To perform any operation on second flash, we need to provide it's base address should be saved in SFAR register for this particular operation.
Exposing only 2 time of ahb_size is design decision but value in SFAR register should be correct.

> 
> Thus, there should be mechanism or the entry in structure where we can have the information of the size of the connected slave device.

Because that's exactly the kind of thing I'd like to avoid. What if the device is bigger than the reserved memory region? What if the sum of all devices does not fit in there? Here I tried to support all cases by just mapping the portion of memory we need.

So IMO, there should be mechanism to have value of start address of each slave device. This might can be done from DTS entry of each slave device connected to the controller.
Boris Brezillon June 18, 2018, 7:15 p.m. | #23
Hi Yogesh,

On Mon, 18 Jun 2018 13:32:27 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> -----Original Message-----
> From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
> Sent: Friday, June 15, 2018 7:26 PM
> To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>; Fabio Estevam <fabio.estevam@nxp.com>; David Wolfe <david.wolfe@nxp.com>; dwmw2@infradead.org
> Cc: richard@nod.at; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org; linux-spi@vger.kernel.org; marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>; broonie@kernel.org; linux-mtd@lists.infradead.org; miquel.raynal@bootlin.com; computersforpeace@gmail.com
> Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller
> 
> On Fri, 15 Jun 2018 13:42:12 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
> > Hi Boris,
> > 
> > I am still debugging the issue.
> > With some analysis, able to check that proper values are not being written for QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD register.
> > 
> > In current code, value of map_addr are being assigned to these register.
> >              map_addr = q->memmap_phy +
> >                         2 * q->devtype_data->ahb_buf_size;
> > 
> >      qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
> > 
> > But instead of "q->devtype_data->ahb_buf_size" it should be flash size.  
> 
> No, because we're only using 2 * ->ahb_buf_size in the direct mapping for each device, and we're modifying the mapping dynamically based on the selected device. Maybe we got the logic wrong though.
> 
> Yes, for register QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD, we need to save starting actual address from where this flash is getting started.
> Thus, if my first flash size is 64MB, then register QUADSPI_SFA2AD would have value of q->memmap_phy + 0x4000000 i.e. (QUADSPI_SFA1AD + sizeof First Flash)
> If second flash is of size 32MB, then register QUADSPI_SFB1AD would have value of value of QUADSPI_SFA2AD + sizeof second flash.

Again, no, that's not what I'm trying to do, and the fact that it
worked fine with CS0 makes me think you don't need to map the whole
device to get it to work, just 2 * ->ahb_buf_size per device.

> 
> > For my case flash size is 0x4000000 and with this hard coded value I am able to perform Write and Erase operation.
> > One more change, I have to do is adding the flash_size when writing the base_address in SFAR register for case when "mem->spi->chip_select == 1"
> > 	qspi_writel(q, q->memmap_phy + 0x4000000, base + QUADSPI_SFAR);  
> 
> I don't want to expose the full device in the direct mapping yet (that's part of the direct-mapping API I posted here [1]). What this version of the driver does is, map only 2 time the ahb_size so that we can bypass the internal cache of the QSPI engine.
> 
> To perform any operation on second flash, we need to provide it's base address should be saved in SFAR register for this particular operation.

That's what we tried to do, we tried to make all CS start at 0 when they
are used and declare unused CS at having a size of 0.

So, say you're trying to access CS1, you should have the following
ranges:

CS0: 0 -> 0 (size = 0)
CS1: 0 -> 2 * ->ahb_buf_size (size = 2 * ->ahb_buf_size)
CS2: 2 * ->ahb_buf_size -> 2 * ->ahb_buf_size (size = 0)
CS3: 2 * ->ahb_buf_size -> 2 * ->ahb_buf_size (size = 0)

now, if you're trying to access CS3:

CS0: 0 -> 0 (size = 0)
CS1: 0 -> 0 (size = 0)
CS2: 0 -> 0 (size = 0)
CS3: 0 -> 2 * ->ahb_buf_size (size = 2 * ->ahb_buf_size)

maybe this approach does not work, but that's not clearly stated as
'not supported' in the datasheet.

> Exposing only 2 time of ahb_size is design decision but value in SFAR register should be correct.
> 
> > 
> > Thus, there should be mechanism or the entry in structure where we can have the information of the size of the connected slave device.  
> 
> Because that's exactly the kind of thing I'd like to avoid. What if the device is bigger than the reserved memory region? What if the sum of all devices does not fit in there? Here I tried to support all cases by just mapping the portion of memory we need.
> 
> So IMO, there should be mechanism to have value of start address of each slave device. This might can be done from DTS entry of each slave device connected to the controller.

Let's not put that in the DT. If we really can't re-use 0 as the start
address and make some ranges 0 in size, then let's reserve 2 *
->ahb_buf_size per chip, and be done with it.

This should leave us enough space in the AHB mem range to then support
temporary direct mappings through the direct mapping API.

Regards,

Boris
Boris Brezillon June 18, 2018, 7:27 p.m. | #24
Yogesh,

On Wed, 30 May 2018 15:14:32 +0200
Frieder Schrempf <frieder.schrempf@exceet.de> wrote:

> +static void fsl_qspi_select_mem(struct fsl_qspi *q, struct spi_device *spi)
> +{
> +	unsigned long rate = spi->max_speed_hz;
> +	int ret, i;
> +	u32 map_addr;
> +
> +	if (q->selected == spi->chip_select)
> +		return;
> +
> +	/*
> +	 * In HW there can be a maximum of four chips on two buses with
> +	 * two chip selects on each bus. We use four chip selects in SW
> +	 * to differentiate between the four chips.
> +	 * We use the SFA1AD, SFA2AD, SFB1AD, SFB2AD registers to select
> +	 * the chip we want to access.
> +	 */
> +	for (i = 0; i < 4; i++) {
> +		if (i < spi->chip_select)

Can you try with:

		if (i <= spi->chip_select)

and let me know if it fixes the problem you have when CS != 0?

> +			map_addr = q->memmap_phy;
> +		else
> +			map_addr = q->memmap_phy +
> +				   2 * q->devtype_data->ahb_buf_size;
> +
> +		qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
> +	}
> +
> +	if (needs_4x_clock(q))
> +		rate *= 4;
> +
> +	fsl_qspi_clk_disable_unprep(q);
> +
> +	ret = clk_set_rate(q->clk, rate);
> +	if (ret)
> +		return;
> +
> +	ret = fsl_qspi_clk_prep_enable(q);
> +	if (ret)
> +		return;
> +
> +	q->selected = spi->chip_select;
> +}
Yogesh Narayan Gaur June 19, 2018, 7:10 a.m. | #25
Hi Boris,

-----Original Message-----
From: Boris Brezillon [mailto:boris.brezillon@bootlin.com] 
Sent: Tuesday, June 19, 2018 12:46 AM
To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
Cc: Fabio Estevam <fabio.estevam@nxp.com>; David Wolfe <david.wolfe@nxp.com>; dwmw2@infradead.org; richard@nod.at; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org; linux-spi@vger.kernel.org; marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>; broonie@kernel.org; linux-mtd@lists.infradead.org; miquel.raynal@bootlin.com; computersforpeace@gmail.com
Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI controller

Hi Yogesh,

On Mon, 18 Jun 2018 13:32:27 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> -----Original Message-----
> From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> Sent: Friday, June 15, 2018 7:26 PM
> To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>; Fabio Estevam 
> <fabio.estevam@nxp.com>; David Wolfe <david.wolfe@nxp.com>; 
> dwmw2@infradead.org
> Cc: richard@nod.at; Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>; 
> Han Xu <han.xu@nxp.com>; linux-kernel@vger.kernel.org; 
> linux-spi@vger.kernel.org; marek.vasut@gmail.com; Frieder Schrempf 
> <frieder.schrempf@exceet.de>; broonie@kernel.org; 
> linux-mtd@lists.infradead.org; miquel.raynal@bootlin.com; 
> computersforpeace@gmail.com
> Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP 
> QuadSPI controller
> 
> On Fri, 15 Jun 2018 13:42:12 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
> > Hi Boris,
> > 
> > I am still debugging the issue.
> > With some analysis, able to check that proper values are not being written for QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD register.
> > 
> > In current code, value of map_addr are being assigned to these register.
> >              map_addr = q->memmap_phy +
> >                         2 * q->devtype_data->ahb_buf_size;
> > 
> >      qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
> > 
> > But instead of "q->devtype_data->ahb_buf_size" it should be flash size.  
> 
> No, because we're only using 2 * ->ahb_buf_size in the direct mapping for each device, and we're modifying the mapping dynamically based on the selected device. Maybe we got the logic wrong though.
> 
> Yes, for register QUADSPI_SFA2AD/ QUADSPI_SFB1AD/ QUADSPI_SFB2AD, we need to save starting actual address from where this flash is getting started.
> Thus, if my first flash size is 64MB, then register QUADSPI_SFA2AD 
> would have value of q->memmap_phy + 0x4000000 i.e. (QUADSPI_SFA1AD + sizeof First Flash) If second flash is of size 32MB, then register QUADSPI_SFB1AD would have value of value of QUADSPI_SFA2AD + sizeof second flash.

Again, no, that's not what I'm trying to do, and the fact that it worked fine with CS0 makes me think you don't need to map the whole device to get it to work, just 2 * ->ahb_buf_size per device.

> 
> > For my case flash size is 0x4000000 and with this hard coded value I am able to perform Write and Erase operation.
> > One more change, I have to do is adding the flash_size when writing the base_address in SFAR register for case when "mem->spi->chip_select == 1"
> > 	qspi_writel(q, q->memmap_phy + 0x4000000, base + QUADSPI_SFAR);
> 
> I don't want to expose the full device in the direct mapping yet (that's part of the direct-mapping API I posted here [1]). What this version of the driver does is, map only 2 time the ahb_size so that we can bypass the internal cache of the QSPI engine.
> 
> To perform any operation on second flash, we need to provide it's base address should be saved in SFAR register for this particular operation.

That's what we tried to do, we tried to make all CS start at 0 when they are used and declare unused CS at having a size of 0.

So, say you're trying to access CS1, you should have the following
ranges:

CS0: 0 -> 0 (size = 0)
CS1: 0 -> 2 * ->ahb_buf_size (size = 2 * ->ahb_buf_size)
CS2: 2 * ->ahb_buf_size -> 2 * ->ahb_buf_size (size = 0)
CS3: 2 * ->ahb_buf_size -> 2 * ->ahb_buf_size (size = 0)

now, if you're trying to access CS3:

CS0: 0 -> 0 (size = 0)
CS1: 0 -> 0 (size = 0)
CS2: 0 -> 0 (size = 0)
CS3: 0 -> 2 * ->ahb_buf_size (size = 2 * ->ahb_buf_size)

maybe this approach does not work, but that's not clearly stated as 'not supported' in the datasheet.

> Exposing only 2 time of ahb_size is design decision but value in SFAR register should be correct.
> 
> > 
> > Thus, there should be mechanism or the entry in structure where we can have the information of the size of the connected slave device.  
> 
> Because that's exactly the kind of thing I'd like to avoid. What if the device is bigger than the reserved memory region? What if the sum of all devices does not fit in there? Here I tried to support all cases by just mapping the portion of memory we need.
> 
> So IMO, there should be mechanism to have value of start address of each slave device. This might can be done from DTS entry of each slave device connected to the controller.

Let's not put that in the DT. If we really can't re-use 0 as the start address and make some ranges 0 in size, then let's reserve 2 *
->ahb_buf_size per chip, and be done with it.

This should leave us enough space in the AHB mem range to then support temporary direct mappings through the direct mapping API.

Let us take below layout of memory address space map.
QuadSPI Controller can access range from 0x2000_0000 - 0x2FFF_FFFF i.e. 256 MB address space reserved and it is having 4 slave devices connected.
These slave devices[of size 64MB, 64MB, 32MB and 64MB ] are connected at below address
0x2000_0000, 0x2400_0000, 0x2A00_0000, 0x2C00_0000
i.e. there is gap of 32MB from 0x2800_0000 to 0x29FF_FFFF.

As per my understanding of the controller, flash XX top address, register should have below values:
  QUADSPI_SFA1AD - 0x0
  QUADSPI_SFA2AD - 0x400_0000
  QUADSPI_SFB1AD - 0xA00_0000
  QUADSPI_SFB2AD - 0xC00_0000
And Register QUADSPI_SFAR should point to the range for the flash in which operation is happening.

Please check Table10-32, page 1657, in [1] for more details on flash address assignment.

But say if I assign address to register QUADSPI_SFA2AD as "0 + 2 * ->ahb_buf_size" then this address value is not correct as per the value range explained in above mentioned table.

Regards
Yogesh Gaur.

Regards,

Boris

[1] https://www.nxp.com/docs/en/reference-manual/VFXXXRM.pdf
Boris Brezillon June 19, 2018, 7:28 a.m. | #26
Hi Yogesh,

Could you please use a mailer that is quoting things correctly. I have
a hard time differentiating your replies from mine.

On Tue, 19 Jun 2018 07:10:37 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> Let us take below layout of memory address space map.
> QuadSPI Controller can access range from 0x2000_0000 - 0x2FFF_FFFF i.e. 256 MB address space reserved and it is having 4 slave devices connected.
> These slave devices[of size 64MB, 64MB, 32MB and 64MB ] are connected at below address
> 0x2000_0000, 0x2400_0000, 0x2A00_0000, 0x2C00_0000
> i.e. there is gap of 32MB from 0x2800_0000 to 0x29FF_FFFF.

Okay, I'm fine with pre-reserving 32MB per chip select.

> 
> As per my understanding of the controller, flash XX top address, register should have below values:
>   QUADSPI_SFA1AD - 0x0
>   QUADSPI_SFA2AD - 0x400_0000
>   QUADSPI_SFB1AD - 0xA00_0000
>   QUADSPI_SFB2AD - 0xC00_0000
> And Register QUADSPI_SFAR should point to the range for the flash in which operation is happening.

Wait, I thought it was supposed to be an absolute address, not one
relative to the 0x20000000 offset.

> 
> Please check Table10-32, page 1657, in [1] for more details on flash address assignment.

Yes, I still don't see where it says that having one of the range with
a zero size is forbidden, or anything mentioning a required alignment.

> 
> But say if I assign address to register QUADSPI_SFA2AD as "0 + 2 * ->ahb_buf_size" then this address value is not correct as per the value range explained in above mentioned table.

Why? If the SFA1AD is set to zero, that should not, right?
Yogesh Narayan Gaur June 19, 2018, 8:31 a.m. | #27
Hi Boris,

> -----Original Message-----
> From: Boris Brezillon [mailto:boris.brezillon@bootlin.com]
> Sent: Tuesday, June 19, 2018 12:59 PM
> To: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>;
> marek.vasut@gmail.com; Frieder Schrempf <frieder.schrempf@exceet.de>;
> broonie@kernel.org
> Cc: Fabio Estevam <fabio.estevam@nxp.com>; David Wolfe
> <david.wolfe@nxp.com>; dwmw2@infradead.org; richard@nod.at; Prabhakar
> Kushwaha <prabhakar.kushwaha@nxp.com>; Han Xu <han.xu@nxp.com>; linux-
> kernel@vger.kernel.org; linux-spi@vger.kernel.org; linux-
> mtd@lists.infradead.org; miquel.raynal@bootlin.com;
> computersforpeace@gmail.com
> Subject: Re: [PATCH 03/11] spi: Add a driver for the Freescale/NXP QuadSPI
> controller
> 
> Hi Yogesh,
> 
> Could you please use a mailer that is quoting things correctly. I have a hard time
> differentiating your replies from mine.

Sorry for this, have changed my mailer settings.

> 
> On Tue, 19 Jun 2018 07:10:37 +0000
> Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> 
> > Let us take below layout of memory address space map.
> > QuadSPI Controller can access range from 0x2000_0000 - 0x2FFF_FFFF i.e. 256
> MB address space reserved and it is having 4 slave devices connected.
> > These slave devices[of size 64MB, 64MB, 32MB and 64MB ] are connected
> > at below address 0x2000_0000, 0x2400_0000, 0x2A00_0000, 0x2C00_0000
> > i.e. there is gap of 32MB from 0x2800_0000 to 0x29FF_FFFF.
> 
> Okay, I'm fine with pre-reserving 32MB per chip select.
> 
> >
> > As per my understanding of the controller, flash XX top address, register should
> have below values:
> >   QUADSPI_SFA1AD - 0x0
> >   QUADSPI_SFA2AD - 0x400_0000
> >   QUADSPI_SFB1AD - 0xA00_0000
> >   QUADSPI_SFB2AD - 0xC00_0000
> > And Register QUADSPI_SFAR should point to the range for the flash in which
> operation is happening.

My mistake values of these register would be for said case are:
QUADSPI_SFA1AD - 0x400_0000
QUADSPI_SFA2AD - 0x800_0000
QUADSPI_SFB1AD - 0xC00_0000
QUADSPI_SFB2AD - 0x1000_0000

i.e. as per controller each register is having the Top address for serial flash connected at A1/A2/B1/B2 respectively.

> 
> Wait, I thought it was supposed to be an absolute address, not one relative to
> the 0x20000000 offset.
> 
> >
> > Please check Table10-32, page 1657, in [1] for more details on flash address
> assignment.
> 
> Yes, I still don't see where it says that having one of the range with a zero size is
> forbidden, or anything mentioning a required alignment.
> 
> >
> > But say if I assign address to register QUADSPI_SFA2AD as "0 + 2 * -
> >ahb_buf_size" then this address value is not correct as per the value range
> explained in above mentioned table.
> 
> Why? If the SFA1AD is set to zero, that should not, right?
What this table says that for TOP_ADDR_MEMA1 defines the top address for flash connected at A1 and any address space between TOP_ADDR_MEMA1 and QSPI_AMBA_BASE will be routed to Serial Flash A1.
In my example case TOP_ADDR_MEMA1 is 0x400_0000
If assign value to SFAR register is "0 + 2 * ->ahb_buf_size", then this would lie in access range of Serial Flash A1 and access happens to A1 flash whereas we want access to A2 flash.

For access of serial flash A2, any address space access between TOP_ADDR_MEMA2 and TOP_ADDR_MEMA1 would be routed to serial flash A2.
Thus to access A2 flash, SFAR would be in range from 0x400_0000 and 0x800_0000

--
Regards
Yogesh Gaur
Boris Brezillon June 19, 2018, 8:46 a.m. | #28
On Tue, 19 Jun 2018 08:31:25 +0000
Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:

> > 
> > Could you please use a mailer that is quoting things correctly. I
> > have a hard time differentiating your replies from mine.  
> 
> Sorry for this, have changed my mailer settings.

Thanks for doing. It's still not perfect, but it's definitely better.

> 
> > 
> > On Tue, 19 Jun 2018 07:10:37 +0000
> > Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com> wrote:
> >   
> > > Let us take below layout of memory address space map.
> > > QuadSPI Controller can access range from 0x2000_0000 -
> > > 0x2FFF_FFFF i.e. 256  
> > MB address space reserved and it is having 4 slave devices
> > connected.  
> > > These slave devices[of size 64MB, 64MB, 32MB and 64MB ] are
> > > connected at below address 0x2000_0000, 0x2400_0000, 0x2A00_0000,
> > > 0x2C00_0000 i.e. there is gap of 32MB from 0x2800_0000 to
> > > 0x29FF_FFFF.  
> > 
> > Okay, I'm fine with pre-reserving 32MB per chip select.
> >   
> > >
> > > As per my understanding of the controller, flash XX top address,
> > > register should  
> > have below values:  
> > >   QUADSPI_SFA1AD - 0x0
> > >   QUADSPI_SFA2AD - 0x400_0000
> > >   QUADSPI_SFB1AD - 0xA00_0000
> > >   QUADSPI_SFB2AD - 0xC00_0000
> > > And Register QUADSPI_SFAR should point to the range for the flash
> > > in which  
> > operation is happening.  
> 
> My mistake values of these register would be for said case are:
> QUADSPI_SFA1AD - 0x400_0000
> QUADSPI_SFA2AD - 0x800_0000
> QUADSPI_SFB1AD - 0xC00_0000
> QUADSPI_SFB2AD - 0x1000_0000
> 
> i.e. as per controller each register is having the Top address for
> serial flash connected at A1/A2/B1/B2 respectively.

This is still wrong ;-). I guess you mean:

QUADSPI_SFA1AD - 0x2400_0000
QUADSPI_SFA2AD - 0x2800_0000
QUADSPI_SFB1AD - 0x2C00_0000
QUADSPI_SFB2AD - 0x3000_0000

> 
> > 
> > Wait, I thought it was supposed to be an absolute address, not one
> > relative to the 0x20000000 offset.
> >   
> > >
> > > Please check Table10-32, page 1657, in [1] for more details on
> > > flash address  
> > assignment.
> > 
> > Yes, I still don't see where it says that having one of the range
> > with a zero size is forbidden, or anything mentioning a required
> > alignment. 
> > >
> > > But say if I assign address to register QUADSPI_SFA2AD as "0 + 2
> > > * -
> > >ahb_buf_size" then this address value is not correct as per the
> > >value range  
> > explained in above mentioned table.
> > 
> > Why? If the SFA1AD is set to zero, that should not, right?  
> What this table says that for TOP_ADDR_MEMA1 defines the top address
> for flash connected at A1 and any address space between
> TOP_ADDR_MEMA1 and QSPI_AMBA_BASE will be routed to Serial Flash A1.
> In my example case TOP_ADDR_MEMA1 is 0x400_0000 If assign value to
> SFAR register is "0 + 2 * ->ahb_buf_size", then this would lie in
> access range of Serial Flash A1 and access happens to A1 flash
> whereas we want access to A2 flash.

No, not if SFA1AD is 0x20000000, because then the address range for CS0
would be 0x20000000 -> 0x20000000.

If you look at the code, you'll see that I adjust the CS mapping
dynamically, making the one being access use the range
0x20000000 -> (0x20000000 + 2 * ->ahb_buf_size) and assigning a 0-size
range for the other ones (either 0x20000000 -> 0x20000000 or
(0x20000000 + 2 * ->ahb_buf_size) -> (0x20000000 + 2 * ->ahb_buf_size))

> 
> For access of serial flash A2, any address space access between
> TOP_ADDR_MEMA2 and TOP_ADDR_MEMA1 would be routed to serial flash A2.
> Thus to access A2 flash, SFAR would be in range from 0x400_0000 and
> 0x800_0000

I understand what you're explaining, what I don't get is why the QSPI
IP doesn't cope with a 0-size range. If you have SFA1AD set to
0x20000000 and SFA2AD set so 0x20000800, I would except any access to
the 0x20000000 -> 0x20000800 range to be routed to CS1 not CS0. But
apparently it's not working like that.

Patch

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index e62ac32..6de0df5 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -251,6 +251,17 @@  config SPI_FSL_LPSPI
 	help
 	  This enables Freescale i.MX LPSPI controllers in master mode.
 
+config SPI_FSL_QSPI
+	tristate "Freescale QSPI controller"
+	depends on ARCH_MXC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
+	depends on HAS_IOMEM
+	help
+	  This enables support for the Quad SPI controller in master mode.
+	  Up to four flash chips can be connected on two buses with two
+	  chipselects each.
+	  This controller does not support generic SPI messages. It only
+	  supports the high-level SPI memory interface.
+
 config SPI_GPIO
 	tristate "GPIO-based bitbanging SPI Master"
 	depends on GPIOLIB || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index cb1f437..a8f7fda 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -43,6 +43,7 @@  obj-$(CONFIG_SPI_FSL_DSPI)		+= spi-fsl-dspi.o
 obj-$(CONFIG_SPI_FSL_LIB)		+= spi-fsl-lib.o
 obj-$(CONFIG_SPI_FSL_ESPI)		+= spi-fsl-espi.o
 obj-$(CONFIG_SPI_FSL_LPSPI)		+= spi-fsl-lpspi.o
+obj-$(CONFIG_SPI_FSL_QSPI)		+= spi-fsl-qspi.o
 obj-$(CONFIG_SPI_FSL_SPI)		+= spi-fsl-spi.o
 obj-$(CONFIG_SPI_GPIO)			+= spi-gpio.o
 obj-$(CONFIG_SPI_IMG_SPFI)		+= spi-img-spfi.o
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
new file mode 100644
index 0000000..c16d070
--- /dev/null
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -0,0 +1,929 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Freescale QuadSPI driver.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ * Copyright (C) 2018 Bootlin
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ *
+ * Transition to SPI MEM interface:
+ * Author:
+ *     Boris Brezillion <boris.brezillon@bootlin.com>
+ *     Frieder Schrempf <frieder.schrempf@exceet.de>
+ *
+ * Based on the original fsl-quadspi.c spi-nor driver:
+ * Author: Freescale Semiconductor, Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
+#include <linux/sizes.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+/*
+ * The driver only uses one single LUT entry, that is updated on
+ * each call of exec_op(). Index 0 is preset at boot with a basic
+ * read operation, so let's use the last entry (15).
+ */
+#define	SEQID_LUT			15
+
+/* Registers used by the driver */
+#define QUADSPI_MCR			0x00
+#define QUADSPI_MCR_RESERVED_MASK	(0xF << 16)
+#define QUADSPI_MCR_MDIS_MASK		BIT(14)
+#define QUADSPI_MCR_CLR_TXF_MASK	BIT(11)
+#define QUADSPI_MCR_CLR_RXF_MASK	BIT(10)
+#define QUADSPI_MCR_DDR_EN_MASK		BIT(7)
+#define QUADSPI_MCR_END_CFG_MASK	(0x3 << 2)
+#define QUADSPI_MCR_SWRSTHD_MASK	BIT(1)
+#define QUADSPI_MCR_SWRSTSD_MASK	BIT(0)
+
+#define QUADSPI_IPCR			0x08
+#define QUADSPI_IPCR_SEQID_SHIFT	24
+
+#define QUADSPI_BUF3CR			0x1c
+#define QUADSPI_BUF3CR_ALLMST_MASK	BIT(31)
+#define QUADSPI_BUF3CR_ADATSZ_SHIFT	8
+#define QUADSPI_BUF3CR_ADATSZ_MASK	(0xFF << QUADSPI_BUF3CR_ADATSZ_SHIFT)
+
+#define QUADSPI_BFGENCR			0x20
+#define QUADSPI_BFGENCR_SEQID_SHIFT	12
+
+#define QUADSPI_BUF0IND			0x30
+#define QUADSPI_BUF1IND			0x34
+#define QUADSPI_BUF2IND			0x38
+#define QUADSPI_SFAR			0x100
+
+#define QUADSPI_SMPR			0x108
+#define QUADSPI_SMPR_DDRSMP_MASK	(7 << 16)
+#define QUADSPI_SMPR_FSDLY_MASK		BIT(6)
+#define QUADSPI_SMPR_FSPHS_MASK		BIT(5)
+#define QUADSPI_SMPR_HSENA_MASK		BIT(0)
+
+#define QUADSPI_RBCT			0x110
+#define QUADSPI_RBCT_WMRK_MASK		0x1F
+#define QUADSPI_RBCT_RXBRD_USEIPS	BIT(8)
+
+#define QUADSPI_TBDR			0x154
+
+#define QUADSPI_SR			0x15c
+#define QUADSPI_SR_IP_ACC_MASK		BIT(1)
+#define QUADSPI_SR_AHB_ACC_MASK		BIT(2)
+
+#define QUADSPI_FR			0x160
+#define QUADSPI_FR_TFF_MASK		BIT(0)
+
+#define QUADSPI_SPTRCLR			0x16c
+#define QUADSPI_SPTRCLR_IPPTRC		BIT(8)
+#define QUADSPI_SPTRCLR_BFPTRC		BIT(0)
+
+#define QUADSPI_SFA1AD			0x180
+#define QUADSPI_SFA2AD			0x184
+#define QUADSPI_SFB1AD			0x188
+#define QUADSPI_SFB2AD			0x18c
+#define QUADSPI_RBDR(x)			(0x200 + ((x) * 4))
+
+#define QUADSPI_LUTKEY			0x300
+#define QUADSPI_LUTKEY_VALUE		0x5AF05AF0
+
+#define QUADSPI_LCKCR			0x304
+#define QUADSPI_LCKER_LOCK		BIT(0)
+#define QUADSPI_LCKER_UNLOCK		BIT(1)
+
+#define QUADSPI_RSER			0x164
+#define QUADSPI_RSER_TFIE		BIT(0)
+
+#define QUADSPI_LUT_BASE		0x310
+#define QUADSPI_LUT_OFFSET		(SEQID_LUT * 4 * 4)
+#define QUADSPI_LUT_REG(idx)		(QUADSPI_LUT_BASE + \
+					QUADSPI_LUT_OFFSET + (idx) * 4)
+
+/* Instruction set for the LUT register */
+#define LUT_STOP		0
+#define LUT_CMD			1
+#define LUT_ADDR		2
+#define LUT_DUMMY		3
+#define LUT_MODE		4
+#define LUT_MODE2		5
+#define LUT_MODE4		6
+#define LUT_FSL_READ		7
+#define LUT_FSL_WRITE		8
+#define LUT_JMP_ON_CS		9
+#define LUT_ADDR_DDR		10
+#define LUT_MODE_DDR		11
+#define LUT_MODE2_DDR		12
+#define LUT_MODE4_DDR		13
+#define LUT_FSL_READ_DDR	14
+#define LUT_FSL_WRITE_DDR	15
+#define LUT_DATA_LEARN		16
+
+/*
+ * The PAD definitions for LUT register.
+ *
+ * The pad stands for the number of IO lines [0:3].
+ * For example, the quad read needs four IO lines,
+ * so you should use LUT_PAD(4).
+ */
+#define LUT_PAD(x) (fls(x) - 1)
+
+/*
+ * Macro for constructing the LUT entries with the following
+ * register layout:
+ *
+ *  ---------------------------------------------------
+ *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
+ *  ---------------------------------------------------
+ */
+#define LUT_DEF(idx, ins, pad, opr)					\
+	((((ins) << 10) | ((pad) << 8) | (opr)) << (((idx) % 2) * 16))
+
+/* Controller needs driver to swap endianness */
+#define QUADSPI_QUIRK_SWAP_ENDIAN	BIT(0)
+
+/* Controller needs 4x internal clock */
+#define QUADSPI_QUIRK_4X_INT_CLK	BIT(1)
+
+/*
+ * TKT253890, the controller needs the driver to fill the txfifo with
+ * 16 bytes at least to trigger a data transfer, even though the extra
+ * data won't be transferred.
+ */
+#define QUADSPI_QUIRK_TKT253890		BIT(2)
+
+/* TKT245618, the controller cannot wake up from wait mode */
+#define QUADSPI_QUIRK_TKT245618		BIT(3)
+
+enum fsl_qspi_devtype {
+	FSL_QUADSPI_VYBRID,
+	FSL_QUADSPI_IMX6SX,
+	FSL_QUADSPI_IMX7D,
+	FSL_QUADSPI_IMX6UL,
+	FSL_QUADSPI_LS1021A,
+	FSL_QUADSPI_LS2080A,
+};
+
+struct fsl_qspi_devtype_data {
+	enum fsl_qspi_devtype devtype;
+	unsigned int rxfifo;
+	unsigned int txfifo;
+	unsigned int ahb_buf_size;
+	unsigned int quirks;
+};
+
+static const struct fsl_qspi_devtype_data vybrid_data = {
+	.devtype = FSL_QUADSPI_VYBRID,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_SWAP_ENDIAN,
+};
+
+static const struct fsl_qspi_devtype_data imx6sx_data = {
+	.devtype = FSL_QUADSPI_IMX6SX,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_4X_INT_CLK | QUADSPI_QUIRK_TKT245618,
+};
+
+static const struct fsl_qspi_devtype_data imx7d_data = {
+	.devtype = FSL_QUADSPI_IMX7D,
+	.rxfifo = SZ_512,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK,
+};
+
+static const struct fsl_qspi_devtype_data imx6ul_data = {
+	.devtype = FSL_QUADSPI_IMX6UL,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK,
+};
+
+static const struct fsl_qspi_devtype_data ls1021a_data = {
+	.devtype = FSL_QUADSPI_LS1021A,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = 0,
+};
+
+static const struct fsl_qspi_devtype_data ls2080a_data = {
+	.devtype = FSL_QUADSPI_LS2080A,
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890,
+};
+
+struct fsl_qspi {
+	void __iomem *iobase;
+	void __iomem *ahb_addr;
+	u32 memmap_phy;
+	struct clk *clk, *clk_en;
+	struct device *dev;
+	struct completion c;
+	const struct fsl_qspi_devtype_data *devtype_data;
+	bool big_endian;
+	struct mutex lock;
+	struct pm_qos_request pm_qos_req;
+	int selected;
+};
+
+static inline int needs_swap_endian(struct fsl_qspi *q)
+{
+	return q->devtype_data->quirks & QUADSPI_QUIRK_SWAP_ENDIAN;
+}
+
+static inline int needs_4x_clock(struct fsl_qspi *q)
+{
+	return q->devtype_data->quirks & QUADSPI_QUIRK_4X_INT_CLK;
+}
+
+static inline int needs_fill_txfifo(struct fsl_qspi *q)
+{
+	return q->devtype_data->quirks & QUADSPI_QUIRK_TKT253890;
+}
+
+static inline int needs_wakeup_wait_mode(struct fsl_qspi *q)
+{
+	return q->devtype_data->quirks & QUADSPI_QUIRK_TKT245618;
+}
+
+/*
+ * An IC bug makes it necessary to rearrange the 32-bit data.
+ * Later chips, such as IMX6SLX, have fixed this bug.
+ */
+static inline u32 fsl_qspi_endian_xchg(struct fsl_qspi *q, u32 a)
+{
+	return needs_swap_endian(q) ? __swab32(a) : a;
+}
+
+/*
+ * R/W functions for big- or little-endian registers:
+ * The QSPI controller's endianness is independent of
+ * the CPU core's endianness. So far, although the CPU
+ * core is little-endian the QSPI controller can use
+ * big-endian or little-endian.
+ */
+static void qspi_writel(struct fsl_qspi *q, u32 val, void __iomem *addr)
+{
+	if (q->big_endian)
+		iowrite32be(val, addr);
+	else
+		iowrite32(val, addr);
+}
+
+static u32 qspi_readl(struct fsl_qspi *q, void __iomem *addr)
+{
+	if (q->big_endian)
+		return ioread32be(addr);
+	else
+		return ioread32(addr);
+}
+
+static irqreturn_t fsl_qspi_irq_handler(int irq, void *dev_id)
+{
+	struct fsl_qspi *q = dev_id;
+	u32 reg;
+
+	/* clear interrupt */
+	reg = qspi_readl(q, q->iobase + QUADSPI_FR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_FR);
+
+	if (reg & QUADSPI_FR_TFF_MASK)
+		complete(&q->c);
+
+	dev_dbg(q->dev, "QUADSPI_FR : 0x%.8x:0x%.8x\n", 0, reg);
+	return IRQ_HANDLED;
+}
+
+static int fsl_qspi_check_buswidth(struct fsl_qspi *q, u8 width)
+{
+	switch (width) {
+	case 1:
+	case 2:
+	case 4:
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static bool fsl_qspi_supports_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	ret = fsl_qspi_check_buswidth(q, op->cmd.buswidth);
+
+	if (op->addr.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->addr.buswidth);
+
+	if (op->dummy.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->dummy.buswidth);
+
+	if (op->data.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->data.buswidth);
+
+	if (ret)
+		return false;
+
+	/*
+	 * The number of instructions needed for the op, needs
+	 * to fit into a single LUT entry.
+	 */
+	if (op->addr.nbytes +
+	   (op->dummy.nbytes ? 1:0) +
+	   (op->data.nbytes ? 1:0) > 6)
+		return false;
+
+	/* Max 64 dummy clock cycles supported */
+	if (op->dummy.nbytes * 8 / op->dummy.buswidth > 64)
+		return false;
+
+	/* Max data length, check controller limits and alignment */
+	if (op->data.dir == SPI_MEM_DATA_IN &&
+	    (op->data.nbytes > q->devtype_data->ahb_buf_size ||
+	     (op->data.nbytes > q->devtype_data->rxfifo - 4 &&
+	      !IS_ALIGNED(op->data.nbytes, 8))))
+		return false;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT &&
+	    op->data.nbytes > q->devtype_data->txfifo)
+		return false;
+
+	return true;
+}
+
+static void fsl_qspi_prepare_lut(struct fsl_qspi *q,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	u32 lutval[4] = {};
+	int lutidx = 1, i;
+
+	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
+			     op->cmd.opcode);
+
+	/*
+	 * For some unknown reason, using LUT_ADDR doesn't work in some
+	 * cases (at least with only one byte long addresses), so
+	 * let's use LUT_MODE to write the address bytes one by one
+	 */
+	for (i = 0; i < op->addr.nbytes; i++) {
+		u8 addrbyte = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_MODE,
+					      LUT_PAD(op->addr.buswidth),
+					      addrbyte);
+		lutidx++;
+	}
+
+	if (op->dummy.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
+					      LUT_PAD(op->dummy.buswidth),
+					      op->dummy.nbytes * 8 /
+					      op->dummy.buswidth);
+		lutidx++;
+	}
+
+	if (op->data.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx,
+					      op->data.dir == SPI_MEM_DATA_IN ?
+					      LUT_FSL_READ : LUT_FSL_WRITE,
+					      LUT_PAD(op->data.buswidth),
+					      0);
+		lutidx++;
+	}
+
+	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
+
+	/* unlock LUT */
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
+
+	/* fill LUT */
+	for (i = 0; i < ARRAY_SIZE(lutval); i++)
+		qspi_writel(q, lutval[i], base + QUADSPI_LUT_REG(i));
+
+	/* lock LUT */
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR);
+}
+
+static int fsl_qspi_clk_prep_enable(struct fsl_qspi *q)
+{
+	int ret;
+
+	ret = clk_prepare_enable(q->clk_en);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(q->clk);
+	if (ret) {
+		clk_disable_unprepare(q->clk_en);
+		return ret;
+	}
+
+	if (needs_wakeup_wait_mode(q))
+		pm_qos_add_request(&q->pm_qos_req, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	return 0;
+}
+
+static void fsl_qspi_clk_disable_unprep(struct fsl_qspi *q)
+{
+	if (needs_wakeup_wait_mode(q))
+		pm_qos_remove_request(&q->pm_qos_req);
+
+	clk_disable_unprepare(q->clk);
+	clk_disable_unprepare(q->clk_en);
+}
+
+static void fsl_qspi_select_mem(struct fsl_qspi *q, struct spi_device *spi)
+{
+	unsigned long rate = spi->max_speed_hz;
+	int ret, i;
+	u32 map_addr;
+
+	if (q->selected == spi->chip_select)
+		return;
+
+	/*
+	 * In HW there can be a maximum of four chips on two buses with
+	 * two chip selects on each bus. We use four chip selects in SW
+	 * to differentiate between the four chips.
+	 * We use the SFA1AD, SFA2AD, SFB1AD, SFB2AD registers to select
+	 * the chip we want to access.
+	 */
+	for (i = 0; i < 4; i++) {
+		if (i < spi->chip_select)
+			map_addr = q->memmap_phy;
+		else
+			map_addr = q->memmap_phy +
+				   2 * q->devtype_data->ahb_buf_size;
+
+		qspi_writel(q, map_addr, q->iobase + QUADSPI_SFA1AD + (i * 4));
+	}
+
+	if (needs_4x_clock(q))
+		rate *= 4;
+
+	fsl_qspi_clk_disable_unprep(q);
+
+	ret = clk_set_rate(q->clk, rate);
+	if (ret)
+		return;
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret)
+		return;
+
+	q->selected = spi->chip_select;
+}
+
+static void fsl_qspi_read_ahb(struct fsl_qspi *q, const struct spi_mem_op *op)
+{
+	static int seq;
+
+	/*
+	 * We want to avoid needing to invalidate the cache by issueing
+	 * a reset to the AHB and Serial Flash domain, as this needs
+	 * time. So we change the address on each read to trigger an
+	 * actual read operation on the flash. The actual address for
+	 * the flash memory is set by programming the LUT.
+	 */
+	memcpy_fromio(op->data.buf.in,
+		      q->ahb_addr +
+		      (seq * q->devtype_data->ahb_buf_size),
+		      op->data.nbytes);
+
+	seq = seq ? 0 : 1;
+}
+
+static void fsl_qspi_fill_txfifo(struct fsl_qspi *q,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int i;
+
+	for (i = 0; i < op->data.nbytes; i += 4) {
+		u32 val = 0;
+
+		memcpy(&val, op->data.buf.out + i,
+		       min_t(unsigned int, op->data.nbytes - i, 4));
+
+		val = fsl_qspi_endian_xchg(q, val);
+		qspi_writel(q, val, base + QUADSPI_TBDR);
+	}
+
+	if (needs_fill_txfifo(q)) {
+		for (; i < 16; i += 4)
+			qspi_writel(q, 0, base + QUADSPI_TBDR);
+	}
+}
+
+static void fsl_qspi_read_rxfifo(struct fsl_qspi *q,
+			  const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int i;
+	u8 *buf = op->data.buf.in;
+
+	for (i = 0; i < op->data.nbytes; i += 4) {
+		u32 val = qspi_readl(q, base + QUADSPI_RBDR(i / 4));
+
+		val = fsl_qspi_endian_xchg(q, val);
+
+		memcpy(buf + i, &val,
+		       min_t(unsigned int, op->data.nbytes - i, 4));
+	}
+}
+
+static int fsl_qspi_do_op(struct fsl_qspi *q, const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int err = 0;
+
+	init_completion(&q->c);
+
+	/*
+	 * Always start the sequence at the same index since we update
+	 * the LUT at each exec_op() call. And also specify the DATA
+	 * length, since it's has not been specified in the LUT.
+	 */
+	qspi_writel(q, op->data.nbytes |
+		    (SEQID_LUT << QUADSPI_IPCR_SEQID_SHIFT),
+		    base + QUADSPI_IPCR);
+
+	/* Wait for the interrupt. */
+	if (!wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+
+	if (!err && op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN)
+		fsl_qspi_read_rxfifo(q, op);
+
+	return err;
+}
+
+static int fsl_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	void __iomem *base = q->iobase;
+	int err = 0;
+
+	mutex_lock(&q->lock);
+
+	/* wait for the controller being ready */
+	do {
+		u32 status;
+
+		status = qspi_readl(q, base + QUADSPI_SR);
+		if (status &
+		    (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
+			udelay(1);
+			dev_dbg(q->dev, "The controller is busy, 0x%x\n",
+				status);
+			continue;
+		}
+		break;
+	} while (1);
+
+	fsl_qspi_select_mem(q, mem->spi);
+
+	qspi_writel(q, q->memmap_phy, base + QUADSPI_SFAR);
+
+	qspi_writel(q,
+		    qspi_readl(q, base + QUADSPI_MCR) |
+		    QUADSPI_MCR_CLR_RXF_MASK | QUADSPI_MCR_CLR_TXF_MASK,
+		    base + QUADSPI_MCR);
+
+	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC | QUADSPI_SPTRCLR_IPPTRC,
+		    base + QUADSPI_SPTRCLR);
+
+	fsl_qspi_prepare_lut(q, op);
+
+	/*
+	 * If we have large chunks of data, we read them through the AHB bus
+	 * by accessing the mapped memory. In all other cases we use
+	 * IP commands to access the flash.
+	 */
+	if (op->data.nbytes > (q->devtype_data->rxfifo - 4) &&
+	    op->data.dir == SPI_MEM_DATA_IN) {
+		fsl_qspi_read_ahb(q, op);
+	} else {
+		qspi_writel(q,
+			    QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
+			    base + QUADSPI_RBCT);
+
+		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+			fsl_qspi_fill_txfifo(q, op);
+
+		err = fsl_qspi_do_op(q, op);
+	}
+
+	mutex_unlock(&q->lock);
+
+	return err;
+}
+
+static int fsl_qspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		if (op->data.nbytes > q->devtype_data->txfifo)
+			op->data.nbytes = q->devtype_data->txfifo;
+	} else {
+		if (op->data.nbytes > q->devtype_data->ahb_buf_size)
+			op->data.nbytes = q->devtype_data->ahb_buf_size;
+		else if (op->data.nbytes > (q->devtype_data->rxfifo - 4))
+			op->data.nbytes = ALIGN_DOWN(op->data.nbytes, 8);
+	}
+
+	return 0;
+}
+
+static int fsl_qspi_default_setup(struct fsl_qspi *q)
+{
+	void __iomem *base = q->iobase;
+	u32 reg;
+	int ret;
+
+	/* disable and unprepare clock to avoid glitch pass to controller */
+	fsl_qspi_clk_disable_unprep(q);
+
+	/* the default frequency, we will change it later if necessary. */
+	ret = clk_set_rate(q->clk, 66000000);
+	if (ret)
+		return ret;
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret)
+		return ret;
+
+	/* Reset the module */
+	qspi_writel(q, QUADSPI_MCR_SWRSTSD_MASK | QUADSPI_MCR_SWRSTHD_MASK,
+		base + QUADSPI_MCR);
+	udelay(1);
+
+	/* Disable the module */
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
+			base + QUADSPI_MCR);
+
+	reg = qspi_readl(q, base + QUADSPI_SMPR);
+	qspi_writel(q, reg & ~(QUADSPI_SMPR_FSDLY_MASK
+			| QUADSPI_SMPR_FSPHS_MASK
+			| QUADSPI_SMPR_HSENA_MASK
+			| QUADSPI_SMPR_DDRSMP_MASK), base + QUADSPI_SMPR);
+
+	/* We only use the buffer3 for AHB read */
+	qspi_writel(q, 0, base + QUADSPI_BUF0IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
+
+	qspi_writel(q, SEQID_LUT << QUADSPI_BFGENCR_SEQID_SHIFT,
+		    q->iobase + QUADSPI_BFGENCR);
+	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, base + QUADSPI_RBCT);
+	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
+		    ((q->devtype_data->ahb_buf_size / 8) <<
+		    QUADSPI_BUF3CR_ADATSZ_SHIFT),
+		    base + QUADSPI_BUF3CR);
+
+	q->selected = -1;
+
+	/* Enable the module */
+	qspi_writel(q, QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
+			base + QUADSPI_MCR);
+
+	/* clear all interrupt status */
+	qspi_writel(q, 0xffffffff, q->iobase + QUADSPI_FR);
+
+	/* enable the interrupt */
+	qspi_writel(q, QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
+
+	return 0;
+}
+
+static const char *fsl_qspi_get_name(struct spi_mem *mem)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	struct device *dev = &mem->spi->dev;
+	const char *name;
+
+	/*
+	 * In order to keep mtdparts compatible with the old MTD driver at
+	 * mtd/spi-nor/fsl-quadspi.c, we set a custom name derived from the
+	 * platform_device of the controller.
+	 */
+	if (of_get_available_child_count(q->dev->of_node) == 1)
+		name = dev_name(q->dev);
+	else
+		name = devm_kasprintf(dev, GFP_KERNEL,
+				      "%s-%d", dev_name(q->dev),
+				      mem->spi->chip_select);
+
+	if (!name) {
+		dev_err(dev, "failed to get memory for custom flash name\n");
+		return dev_name(q->dev);
+	}
+
+	return name;
+}
+
+static const struct spi_controller_mem_ops fsl_qspi_mem_ops = {
+	.adjust_op_size = fsl_qspi_adjust_op_size,
+	.supports_op = fsl_qspi_supports_op,
+	.exec_op = fsl_qspi_exec_op,
+	.get_name = fsl_qspi_get_name,
+};
+
+static int fsl_qspi_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	struct fsl_qspi *q;
+	int ret;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*q));
+	if (!ctlr)
+		return -ENOMEM;
+
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD |
+			  SPI_TX_DUAL | SPI_TX_QUAD;
+
+	q = spi_controller_get_devdata(ctlr);
+	q->dev = dev;
+	q->devtype_data = of_device_get_match_data(dev);
+	if (!q->devtype_data) {
+		ret = -ENODEV;
+		goto err_put_ctrl;
+	}
+
+	platform_set_drvdata(pdev, q);
+
+	/* find the resources */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "QuadSPI");
+	q->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->iobase)) {
+		ret = PTR_ERR(q->iobase);
+		goto err_put_ctrl;
+	}
+
+	q->big_endian = of_property_read_bool(np, "big-endian");
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					"QuadSPI-memory");
+	q->ahb_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->ahb_addr)) {
+		ret = PTR_ERR(q->ahb_addr);
+		goto err_put_ctrl;
+	}
+
+	q->memmap_phy = res->start;
+
+	/* find the clocks */
+	q->clk_en = devm_clk_get(dev, "qspi_en");
+	if (IS_ERR(q->clk_en)) {
+		ret = PTR_ERR(q->clk_en);
+		goto err_put_ctrl;
+	}
+
+	q->clk = devm_clk_get(dev, "qspi");
+	if (IS_ERR(q->clk)) {
+		ret = PTR_ERR(q->clk);
+		goto err_put_ctrl;
+	}
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		goto err_put_ctrl;
+	}
+
+	/* find the irq */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "failed to get the irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	ret = devm_request_irq(dev, ret,
+			fsl_qspi_irq_handler, 0, pdev->name, q);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	mutex_init(&q->lock);
+
+	ctlr->bus_num = -1;
+	ctlr->num_chipselect = 4;
+	ctlr->mem_ops = &fsl_qspi_mem_ops;
+
+	fsl_qspi_default_setup(q);
+
+	ctlr->dev.of_node = np;
+
+	ret = spi_register_controller(ctlr);
+	if (ret)
+		goto err_destroy_mutex;
+
+	return 0;
+
+err_destroy_mutex:
+	mutex_destroy(&q->lock);
+
+err_disable_clk:
+	fsl_qspi_clk_disable_unprep(q);
+
+err_put_ctrl:
+	spi_controller_put(ctlr);
+
+	dev_err(dev, "Freescale QuadSPI probe failed\n");
+	return ret;
+}
+
+static int fsl_qspi_remove(struct platform_device *pdev)
+{
+	struct fsl_qspi *q = platform_get_drvdata(pdev);
+
+	/* disable the hardware */
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER);
+
+	fsl_qspi_clk_disable_unprep(q);
+
+	mutex_destroy(&q->lock);
+
+	if (q->ahb_addr)
+		iounmap(q->ahb_addr);
+
+	return 0;
+}
+
+static int fsl_qspi_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	return 0;
+}
+
+static int fsl_qspi_resume(struct platform_device *pdev)
+{
+	struct fsl_qspi *q = platform_get_drvdata(pdev);
+
+	fsl_qspi_default_setup(q);
+
+	return 0;
+}
+
+static const struct of_device_id fsl_qspi_dt_ids[] = {
+	{ .compatible = "fsl,vf610-qspi", .data = &vybrid_data, },
+	{ .compatible = "fsl,imx6sx-qspi", .data = &imx6sx_data, },
+	{ .compatible = "fsl,imx7d-qspi", .data = &imx7d_data, },
+	{ .compatible = "fsl,imx6ul-qspi", .data = &imx6ul_data, },
+	{ .compatible = "fsl,ls1021a-qspi", .data = &ls1021a_data, },
+	{ .compatible = "fsl,ls2080a-qspi", .data = &ls2080a_data, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_qspi_dt_ids);
+
+static struct platform_driver fsl_qspi_driver = {
+	.driver = {
+		.name	= "fsl-quadspi",
+		.of_match_table = fsl_qspi_dt_ids,
+	},
+	.probe          = fsl_qspi_probe,
+	.remove		= fsl_qspi_remove,
+	.suspend	= fsl_qspi_suspend,
+	.resume		= fsl_qspi_resume,
+};
+module_platform_driver(fsl_qspi_driver);
+
+MODULE_DESCRIPTION("Freescale QuadSPI Controller Driver");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_AUTHOR("Boris Brezillion <boris.brezillon@bootlin.com>");
+MODULE_AUTHOR("Frieder Schrempf <frieder.schrempf@exceet.de>");
+MODULE_LICENSE("GPL v2");