diff mbox

[1/2] fsl-quadspi: fix QUAD read, add NORMAL, DUAL and FAST reads

Message ID 20160927055957.427-1-albert.aribaud@3adev.fr
State Rejected
Delegated to: Cyrille Pitchen
Headers show

Commit Message

Albert ARIBAUD (3ADEV) Sept. 27, 2016, 5:59 a.m. UTC
Fix QUAD read LUT sequence missing JMP_ON_CS which caused
read corruptions with non-1K-size reads on BK4R1 machine.

Add NORMAL, DUAL and FAST read sequences.

Introduce spi-bus-width property for bus subnodes, to
specify per-bus capability to use NORMAL, FAST, DUAL,
and/or QUAD reads.

Also:

Simplify ioremap/memcpy usage.

Make controller clock frequency equal to the lowest of
subnode max frequencies instead of the last one.

Limit Vybrid to use only half its RX fifo, using more will
cause corrupt reads.

Signed-off-by: Albert ARIBAUD (3ADEV) <albert.aribaud@3adev.fr>
---
 drivers/mtd/spi-nor/fsl-quadspi.c | 204 ++++++++++++++++++++++++++++----------
 1 file changed, 152 insertions(+), 52 deletions(-)

Comments

Han Xu Sept. 28, 2016, 8:06 p.m. UTC | #1
On Tue, Sep 27, 2016 at 07:59:56AM +0200, Albert ARIBAUD (3ADEV) wrote:
> Fix QUAD read LUT sequence missing JMP_ON_CS which caused
> read corruptions with non-1K-size reads on BK4R1 machine.
> 
> Add NORMAL, DUAL and FAST read sequences.
> 
> Introduce spi-bus-width property for bus subnodes, to
> specify per-bus capability to use NORMAL, FAST, DUAL,
> and/or QUAD reads.
> 
> Also:
> 
> Simplify ioremap/memcpy usage.
> 
> Make controller clock frequency equal to the lowest of
> subnode max frequencies instead of the last one.
> 
> Limit Vybrid to use only half its RX fifo, using more will
> cause corrupt reads.
> 

Too much changes in one patch, need to split to several patches.

> Signed-off-by: Albert ARIBAUD (3ADEV) <albert.aribaud@3adev.fr>
> ---
>  drivers/mtd/spi-nor/fsl-quadspi.c | 204 ++++++++++++++++++++++++++++----------
>  1 file changed, 152 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
> index 5c82e4e..9811cf2 100644
> --- a/drivers/mtd/spi-nor/fsl-quadspi.c
> +++ b/drivers/mtd/spi-nor/fsl-quadspi.c
> @@ -41,6 +41,8 @@
>  #define QUADSPI_QUIRK_TKT253890		(1 << 2)
>  /* Controller cannot wake up from wait mode, TKT245618 */
>  #define QUADSPI_QUIRK_TKT245618         (1 << 3)
> +/* Controller can only read half a rx fifo through AHB */
> +#define QUADSPI_QUIRK_AHB_READ_HALF_FIFO  (1 << 4)
>  
>  /* The registers */
>  #define QUADSPI_MCR			0x00
> @@ -117,6 +119,10 @@
>  #define QUADSPI_FR			0x160
>  #define QUADSPI_FR_TFF_MASK		0x1
>  
> +#define QUADSPI_SPTRCLR			0x15c
> +#define QUADSPI_SPTRCLR_BFPTRC_SHIFT		0
> +#define QUADSPI_SPTRCLR_BFPTRC_MASK		(0x1 << QUADSPI_SPTRCLR_BFPTRC_SHIFT)
> +

offset 0x15c is SR register for i.MX, not sure where does SPTRCLR come from

>  #define QUADSPI_SFA1AD			0x180
>  #define QUADSPI_SFA2AD			0x184
>  #define QUADSPI_SFB1AD			0x188
> @@ -193,7 +199,7 @@
>  #define QUADSPI_LUT_NUM		64
>  
>  /* SEQID -- we can have 16 seqids at most. */
> -#define SEQID_QUAD_READ		0
> +#define SEQID_READ		0
>  #define SEQID_WREN		1
>  #define SEQID_WRDI		2
>  #define SEQID_RDSR		3
> @@ -205,6 +211,9 @@
>  #define SEQID_RDCR		9
>  #define SEQID_EN4B		10
>  #define SEQID_BRWR		11
> +#define SEQID_FAST_READ		12
> +#define SEQID_DUAL_READ		13
> +#define SEQID_QUAD_READ		14
>  
>  #define QUADSPI_MIN_IOMAP SZ_4M
>  
> @@ -229,7 +238,8 @@ static struct fsl_qspi_devtype_data vybrid_data = {
>  	.rxfifo = 128,
>  	.txfifo = 64,
>  	.ahb_buf_size = 1024,
> -	.driver_data = QUADSPI_QUIRK_SWAP_ENDIAN,
> +	.driver_data = QUADSPI_QUIRK_SWAP_ENDIAN
> +		       | QUADSPI_QUIRK_AHB_READ_HALF_FIFO,
>  };
>  
>  static struct fsl_qspi_devtype_data imx6sx_data = {
> @@ -309,6 +319,11 @@ static inline int needs_wakeup_wait_mode(struct fsl_qspi *q)
>  	return q->devtype_data->driver_data & QUADSPI_QUIRK_TKT245618;
>  }
>  
> +static inline int needs_half_fifo(struct fsl_qspi *q)
> +{
> +	return q->devtype_data->driver_data & QUADSPI_QUIRK_AHB_READ_HALF_FIFO;
> +}
> +
>  /*
>   * R/W functions for big- or little-endian registers:
>   * The qSPI controller's endian is independent of the CPU core's endian.
> @@ -373,8 +388,20 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	void __iomem *base = q->iobase;
>  	int rxfifo = q->devtype_data->rxfifo;
>  	u32 lut_base;
> -	u8 cmd, addrlen, dummy;
> +	u8 cmd, addrlen;
>  	int i;
> +	/* use 8 dummy cycles for all fast operations */
> +	const int dummy = 8;
> +
> +	/* use only half fifo if controller needs that */
> +	if (needs_half_fifo(q))
> +		rxfifo /= 2;
> +
> +    /* use 24-bit addresses for up to 16MB, 32-bit above 16MB */
> +	if (q->nor_size <= SZ_16M)
> +		addrlen = ADDR24BIT;
> +	else
> +		addrlen = ADDR32BIT;

A better patch fetch info from nor structure, refer to
https://patchwork.ozlabs.org/patch/613429/
>  
>  	fsl_qspi_unlock_lut(q);
>  
> @@ -382,24 +409,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	for (i = 0; i < QUADSPI_LUT_NUM; i++)
>  		qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
>  
> -	/* Quad Read */
> -	lut_base = SEQID_QUAD_READ * 4;
> -
> -	if (q->nor_size <= SZ_16M) {
> -		cmd = SPINOR_OP_READ_1_1_4;
> -		addrlen = ADDR24BIT;
> -		dummy = 8;
> -	} else {
> -		/* use the 4-byte address */
> -		cmd = SPINOR_OP_READ_1_1_4;
> -		addrlen = ADDR32BIT;
> -		dummy = 8;
> -	}
> -
> -	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
> -			base + QUADSPI_LUT(lut_base));
> -	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
> -			base + QUADSPI_LUT(lut_base + 1));
> +	/* Read */
> +	lut_base = SEQID_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(FSL_READ, PAD1, rxfifo) | LUT1(JMP_ON_CS, PAD1, SEQID_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 1));
>  
>  	/* Write enable */
>  	lut_base = SEQID_WREN * 4;
> @@ -409,16 +424,7 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	/* Page Program */
>  	lut_base = SEQID_PP * 4;
>  
> -	if (q->nor_size <= SZ_16M) {
> -		cmd = SPINOR_OP_PP;
> -		addrlen = ADDR24BIT;
> -	} else {
> -		/* use the 4-byte address */
> -		cmd = SPINOR_OP_PP;
> -		addrlen = ADDR32BIT;
> -	}
> -
> -	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_PP) | LUT1(ADDR, PAD1, addrlen),
>  			base + QUADSPI_LUT(lut_base));
>  	qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
>  			base + QUADSPI_LUT(lut_base + 1));
> @@ -431,7 +437,6 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  
>  	/* Erase a sector */
>  	lut_base = SEQID_SE * 4;
> -
>  	cmd = q->nor[0].erase_opcode;
>  	addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
>  
> @@ -476,6 +481,33 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_BRWR),
>  			base + QUADSPI_LUT(lut_base));
>  
> +	/* Fast Read */
> +	lut_base = SEQID_FAST_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_FAST) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD1, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_FAST_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
> +	/* Dual Read */
> +	lut_base = SEQID_DUAL_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_2) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD2, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_DUAL_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
> +	/* Quad Read */
> +	lut_base = SEQID_QUAD_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_4) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_QUAD_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
>  	fsl_qspi_lock_lut(q);
>  }
>  
> @@ -483,8 +515,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
>  {
>  	switch (cmd) {
> -	case SPINOR_OP_READ_1_1_4:
> -		return SEQID_QUAD_READ;
> +	case SPINOR_OP_READ:
> +		return SEQID_READ;
>  	case SPINOR_OP_WREN:
>  		return SEQID_WREN;
>  	case SPINOR_OP_WRDI:
> @@ -507,6 +539,12 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
>  		return SEQID_EN4B;
>  	case SPINOR_OP_BRWR:
>  		return SEQID_BRWR;
> +	case SPINOR_OP_READ_FAST:
> +		return SEQID_FAST_READ;
> +	case SPINOR_OP_READ_1_1_2:
> +		return SEQID_DUAL_READ;
> +	case SPINOR_OP_READ_1_1_4:
> +		return SEQID_QUAD_READ;
>  	default:
>  		if (cmd == q->nor[0].erase_opcode)
>  			return SEQID_SE;
> @@ -676,11 +714,16 @@ static void fsl_qspi_set_map_addr(struct fsl_qspi *q)
>   * the memcpy to read the data directly. A "missed" access to the buffer
>   * causes the controller to clear the buffer, and use the sequence pointed
>   * by the QUADSPI_BFGENCR[SEQID] to initiate a read from the flash.
> + *
> + * When using two ports, the SEQID to use for one port might differ from
> + * the one to use for the other (e.g., if one port can do 4-pad reads but
> + * the other cannot). So we set up a basic mode here (SEQID_READ) and we
> + * will set up the proper SEQID for the port right before doing the AHB
> + * access(es).
>   */

It's rare to find this use case, considering the amount of lut is only 16,
please don't add two many luts in this way. I will send a patch soon for
dynamic lut change, please add these extra luts in dynamic lut list.

>  static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  {
>  	void __iomem *base = q->iobase;
> -	int seqid;
>  
>  	/* AHB configuration for access buffer 0/1/2 .*/
>  	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
> @@ -688,11 +731,13 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
>  	/*
>  	 * Set ADATSZ with the maximum AHB buffer size to improve the
> -	 * read performance.
> +	 * read performance, except when the controller should not use
> +	 * more than half its RX fifo in AHB reads, in which case read
> +	 * size is given in the LUT FSL_READ instructions.
>  	 */
>  	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
> -			((q->devtype_data->ahb_buf_size / 8)
> -			<< QUADSPI_BUF3CR_ADATSZ_SHIFT),
> +			(needs_half_fifo(q)? 0 : q->devtype_data->ahb_buf_size / 8)
> +	        << QUADSPI_BUF3CR_ADATSZ_SHIFT,
>  			base + QUADSPI_BUF3CR);
>  
>  	/* We only use the buffer3 */
> @@ -700,9 +745,8 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
>  	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
>  
> -	/* Set the default lut sequence for AHB Read. */
> -	seqid = fsl_qspi_get_seqid(q, q->nor[0].read_opcode);
> -	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
> +	/* Set the default lut sequence for AHB Read to READ. */
> +	qspi_writel(q, SEQID_READ << QUADSPI_BFGENCR_SEQID_SHIFT,
>  		q->iobase + QUADSPI_BFGENCR);
>  }
>  
> @@ -841,6 +885,7 @@ static int fsl_qspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
>  		return ret;
>  
>  	fsl_qspi_read_data(q, len, buf);
> +
>  	return 0;
>  }
>  
> @@ -887,11 +932,27 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  {
>  	struct fsl_qspi *q = nor->priv;
>  	u8 cmd = nor->read_opcode;
> +	uint32_t seqid;
> +	size_t qlen = q->nor_size * 4;
> +	int nor_idx = nor - q->nor;
> +	size_t nor_ofs = q->nor_size * nor_idx;
> +
> +	/* Set the actual lut sequence for AHB Read from the considered nor. */
> +	seqid = fsl_qspi_get_seqid(q, nor->read_opcode);
> +	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
> +		q->iobase + QUADSPI_BFGENCR);
> +	/* Reset the AHB sequence pointer */
> +	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC_MASK, q->iobase + QUADSPI_SPTRCLR);
> +    /* make sure the Rx buffer is read through AHB, not IP */
> +	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, q->iobase + QUADSPI_RBCT);
> +
> +	/* set the chip address for READID */
> +	fsl_qspi_set_base_addr(q, q->nor);
>  
>  	/* if necessary,ioremap buffer before AHB read, */
>  	if (!q->ahb_addr) {
> -		q->memmap_offs = q->chip_base_addr + from;
> -		q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
> +		q->memmap_offs = q->chip_base_addr;
> +		q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
>  
>  		q->ahb_addr = ioremap_nocache(
>  				q->memmap_phy + q->memmap_offs,
> @@ -906,8 +967,8 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  			q->memmap_offs + q->memmap_len) {
>  		iounmap(q->ahb_addr);
>  
> -		q->memmap_offs = q->chip_base_addr + from;
> -		q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
> +		q->memmap_offs = q->chip_base_addr;
> +		q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
>  		q->ahb_addr = ioremap_nocache(
>  				q->memmap_phy + q->memmap_offs,
>  				q->memmap_len);
> @@ -917,13 +978,11 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  		}
>  	}
>  
> -	dev_dbg(q->dev, "cmd [%x],read from %p, len:%zd\n",
> -		cmd, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> -		len);
> +	dev_dbg(q->dev, "cmd [%x], read from %p, len:%zd\n",
> +		cmd, q->ahb_addr + nor_ofs + from, len);
>  
>  	/* Read out the data directly from the AHB buffer.*/
> -	memcpy(buf, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> -		len);
> +	memcpy(buf, q->ahb_addr + nor_ofs + from, len);

No much diff from the previous implementation
>  
>  	return len;
>  }
> @@ -980,6 +1039,11 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  	struct spi_nor *nor;
>  	struct mtd_info *mtd;
>  	int ret, i = 0;
> +	const struct of_device_id *of_id =
> +			of_match_device(fsl_qspi_dt_ids, &pdev->dev);
> +	enum read_mode readmode;
> +	unsigned int buswidth;
> +	u32 qspimaxfreq, spimaxfreq;
>  
>  	q = devm_kzalloc(dev, sizeof(*q), GFP_KERNEL);
>  	if (!q)
> @@ -1050,6 +1114,9 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  
>  	mutex_init(&q->lock);
>  
> +	/* Running MIN of all "spi-max-frequency' values present in subnodes */
> +	qspimaxfreq = INT_MAX;
> +
>  	/* iterate the subnodes. */
>  	for_each_available_child_of_node(dev->of_node, np) {
>  		/* skip the holes */
> @@ -1073,15 +1140,44 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  		nor->prepare = fsl_qspi_prep;
>  		nor->unprepare = fsl_qspi_unprep;
>  
> +		/* Update running MIN of subnode max frequencies */
>  		ret = of_property_read_u32(np, "spi-max-frequency",
> -				&q->clk_rate);
> +				&spimaxfreq);
>  		if (ret < 0)
>  			goto mutex_failed;
> +		qspimaxfreq = (qspimaxfreq > spimaxfreq) ? spimaxfreq: qspimaxfreq;
>  
>  		/* set the chip address for READID */
>  		fsl_qspi_set_base_addr(q, nor);
>  
> -		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
> +		/* scan using the proper read mode for this subnode */
> +		if (of_property_read_u32(np, "spi-bus-width", &buswidth)<0) {
> +			nor->read_opcode = SPINOR_OP_READ;
> +			readmode = SPI_NOR_NORMAL;
> +		} else {
> +			switch (buswidth) {
> +			case 4:
> +				nor->read_opcode = SPINOR_OP_READ_1_1_4;
> +				readmode = SPI_NOR_QUAD;
> +				break;
> +			case 2:
> +				nor->read_opcode = SPINOR_OP_READ_1_1_2;
> +				readmode = SPI_NOR_DUAL;
> +				break;
> +			case 1:
> +				nor->read_opcode = SPINOR_OP_READ_FAST;
> +				readmode = SPI_NOR_FAST;
> +				break;
> +			default:
> +				dev_warn(q->dev,
> +					"ignoring spi-bus-width=%d (should be 1, 2 or 4)",
> +					buswidth);
> +				nor->read_opcode = SPINOR_OP_READ;
> +				readmode = SPI_NOR_NORMAL;
> +			}
> +		}
> +
> +		ret = spi_nor_scan(nor, NULL, readmode);

Not necessary, can fetch info from nor structure
https://patchwork.ozlabs.org/patch/613429/

>  		if (ret)
>  			goto mutex_failed;
>  
> @@ -1112,6 +1208,10 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  		i++;
>  	}
>  
> +	/* If we know the lowest subnode frequency, override driver default. */
> +	if (qspimaxfreq<INT_MAX)
> +		q->clk_rate = qspimaxfreq;
> +
>  	/* finish the rest init. */
>  	ret = fsl_qspi_nor_setup_last(q);
>  	if (ret)
> -- 
> 2.9.3
> 
> 
> ______________________________________________________
> Linux MTD discussion mailing list
> http://lists.infradead.org/mailman/listinfo/linux-mtd/
Albert ARIBAUD (3ADEV) Sept. 28, 2016, 9:45 p.m. UTC | #2
Hello Han,

Le Thu, 29 Sep 2016 04:06:52 +0800, Han Xu <xhnjupt@gmail.com> a écrit :

> Too much changes in one patch, need to split to several patches.

Will do.

> > +#define QUADSPI_SPTRCLR			0x15c
> > +#define QUADSPI_SPTRCLR_BFPTRC_SHIFT		0
> > +#define QUADSPI_SPTRCLR_BFPTRC_MASK		(0x1 << QUADSPI_SPTRCLR_BFPTRC_SHIFT)
> > +  
> 
> offset 0x15c is SR register for i.MX, not sure where does SPTRCLR come from

It's a typo -- should have been 0x16c, not 0x15c. It is the Vybrid's
Sequence Pointer Clear Register. Apparently, the bad register write did
not screw thing up enough to cause any visible issue. Thanks for
pointing it out.

> > +    /* use 24-bit addresses for up to 16MB, 32-bit above 16MB */
> > +	if (q->nor_size <= SZ_16M)
> > +		addrlen = ADDR24BIT;
> > +	else
> > +		addrlen = ADDR32BIT;  
> 
> A better patch fetch info from nor structure, refer to
> https://patchwork.ozlabs.org/patch/613429/

v3 (https://patchwork.kernel.org/patch/9287005/) seems poised to go in
at some point. I can base my patches above it. I will mention the
dependency in my series' cover letter.

> > + * When using two ports, the SEQID to use for one port might differ from
> > + * the one to use for the other (e.g., if one port can do 4-pad reads but
> > + * the other cannot). So we set up a basic mode here (SEQID_READ) and we
> > + * will set up the proper SEQID for the port right before doing the AHB
> > + * access(es).
> >   */  
> 
> It's rare to find this use case, considering the amount of lut is only 16,
> please don't add two many luts in this way. I will send a patch soon for
> dynamic lut change, please add these extra luts in dynamic lut list.

I'm fine with switching to a dynamic LUT list; this matches the "two
ports with different characteristics" well. Is the patch already
available somewhere so that I can rebase over it in advance?

> >  	/* Read out the data directly from the AHB buffer.*/
> > -	memcpy(buf, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> > -		len);
> > +	memcpy(buf, q->ahb_addr + nor_ofs + from, len);  
> 
> No much diff from the previous implementation

Still, it simplifies the memcpy source address computation.

> > +		ret = spi_nor_scan(nor, NULL, readmode);  
> 
> Not necessary, can fetch info from nor structure
> https://patchwork.ozlabs.org/patch/613429/

Ditto.

Thanks for your review!

Cordialement,
Albert ARIBAUD
3ADEV
Han Xu Sept. 29, 2016, 10:09 p.m. UTC | #3
From: Albert ARIBAUD <albert.aribaud@3adev.fr>
Sent: Wednesday, September 28, 2016 3:45 PM
To: Han Xu
Cc: linux-mtd@lists.infradead.org; devicetree@vger.kernel.org; Han Xu
Subject: Re: [PATCH 1/2] fsl-quadspi: fix QUAD read, add NORMAL, DUAL and FAST reads

Hello Han,

Le Thu, 29 Sep 2016 04:06:52 +0800, Han Xu <xhnjupt@gmail.com> a écrit :

> Too much changes in one patch, need to split to several patches.

Will do.

> > +#define QUADSPI_SPTRCLR                    0x15c
> > +#define QUADSPI_SPTRCLR_BFPTRC_SHIFT               0
> > +#define QUADSPI_SPTRCLR_BFPTRC_MASK                (0x1 << QUADSPI_SPTRCLR_BFPTRC_SHIFT)
> > +
>
> offset 0x15c is SR register for i.MX, not sure where does SPTRCLR come from

It's a typo -- should have been 0x16c, not 0x15c. It is the Vybrid's
Sequence Pointer Clear Register. Apparently, the bad register write did
not screw thing up enough to cause any visible issue. Thanks for
pointing it out.

> > +    /* use 24-bit addresses for up to 16MB, 32-bit above 16MB */
> > +   if (q->nor_size <= SZ_16M)
> > +           addrlen = ADDR24BIT;
> > +   else
> > +           addrlen = ADDR32BIT;
>
> A better patch fetch info from nor structure, refer to
> https://patchwork.ozlabs.org/patch/613429/

v3 (https://patchwork.kernel.org/patch/9287005/) seems poised to go in
at some point. I can base my patches above it. I will mention the
dependency in my series' cover letter.

> > + * When using two ports, the SEQID to use for one port might differ from
> > + * the one to use for the other (e.g., if one port can do 4-pad reads but
> > + * the other cannot). So we set up a basic mode here (SEQID_READ) and we
> > + * will set up the proper SEQID for the port right before doing the AHB
> > + * access(es).
> >   */
>
> It's rare to find this use case, considering the amount of lut is only 16,
> please don't add two many luts in this way. I will send a patch soon for
> dynamic lut change, please add these extra luts in dynamic lut list.

I'm fine with switching to a dynamic LUT list; this matches the "two
ports with different characteristics" well. Is the patch already
available somewhere so that I can rebase over it in advance?

Just sent the patch out, refer to https://patchwork.ozlabs.org/patch/676791/

> >     /* Read out the data directly from the AHB buffer.*/
> > -   memcpy(buf, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> > -           len);
> > +   memcpy(buf, q->ahb_addr + nor_ofs + from, len);
>
> No much diff from the previous implementation

Still, it simplifies the memcpy source address computation.

> > +           ret = spi_nor_scan(nor, NULL, readmode);
>
> Not necessary, can fetch info from nor structure
> https://patchwork.ozlabs.org/patch/613429/

Ditto.

Thanks for your review!

Cordialement,
Albert ARIBAUD
3ADEV
Cyrille Pitchen Oct. 4, 2016, 12:30 p.m. UTC | #4
Hi Albert,

your patch is likely to conflict with:
https://patchwork.kernel.org/patch/9287005/

Yunhui's patch relies on the settings selected by spi-nor.c to configure the
freescale QSPI controller correctly. The freescale QSPI controller driver, like
other QSPI controller drivers, should never override settings selected in
spi_nor_scan() otherwise it will duplicate the code, will be hard to maintain
and won't work with every memory. For instance, not all memory manufacturers
use the very same op codes for Quad SPI Fast Read.
So let's spi-nor.c handle this stuff!

That's why I suggest you rebase your work on Yunhui patches.

Best regards,

Cyrille

Le 27/09/2016 à 07:59, Albert ARIBAUD (3ADEV) a écrit :
> Fix QUAD read LUT sequence missing JMP_ON_CS which caused
> read corruptions with non-1K-size reads on BK4R1 machine.
> 
> Add NORMAL, DUAL and FAST read sequences.
> 
> Introduce spi-bus-width property for bus subnodes, to
> specify per-bus capability to use NORMAL, FAST, DUAL,
> and/or QUAD reads.
> 
> Also:
> 
> Simplify ioremap/memcpy usage.
> 
> Make controller clock frequency equal to the lowest of
> subnode max frequencies instead of the last one.
> 
> Limit Vybrid to use only half its RX fifo, using more will
> cause corrupt reads.
> 
> Signed-off-by: Albert ARIBAUD (3ADEV) <albert.aribaud@3adev.fr>
> ---
>  drivers/mtd/spi-nor/fsl-quadspi.c | 204 ++++++++++++++++++++++++++++----------
>  1 file changed, 152 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
> index 5c82e4e..9811cf2 100644
> --- a/drivers/mtd/spi-nor/fsl-quadspi.c
> +++ b/drivers/mtd/spi-nor/fsl-quadspi.c
> @@ -41,6 +41,8 @@
>  #define QUADSPI_QUIRK_TKT253890		(1 << 2)
>  /* Controller cannot wake up from wait mode, TKT245618 */
>  #define QUADSPI_QUIRK_TKT245618         (1 << 3)
> +/* Controller can only read half a rx fifo through AHB */
> +#define QUADSPI_QUIRK_AHB_READ_HALF_FIFO  (1 << 4)
>  
>  /* The registers */
>  #define QUADSPI_MCR			0x00
> @@ -117,6 +119,10 @@
>  #define QUADSPI_FR			0x160
>  #define QUADSPI_FR_TFF_MASK		0x1
>  
> +#define QUADSPI_SPTRCLR			0x15c
> +#define QUADSPI_SPTRCLR_BFPTRC_SHIFT		0
> +#define QUADSPI_SPTRCLR_BFPTRC_MASK		(0x1 << QUADSPI_SPTRCLR_BFPTRC_SHIFT)
> +
>  #define QUADSPI_SFA1AD			0x180
>  #define QUADSPI_SFA2AD			0x184
>  #define QUADSPI_SFB1AD			0x188
> @@ -193,7 +199,7 @@
>  #define QUADSPI_LUT_NUM		64
>  
>  /* SEQID -- we can have 16 seqids at most. */
> -#define SEQID_QUAD_READ		0
> +#define SEQID_READ		0
>  #define SEQID_WREN		1
>  #define SEQID_WRDI		2
>  #define SEQID_RDSR		3
> @@ -205,6 +211,9 @@
>  #define SEQID_RDCR		9
>  #define SEQID_EN4B		10
>  #define SEQID_BRWR		11
> +#define SEQID_FAST_READ		12
> +#define SEQID_DUAL_READ		13
> +#define SEQID_QUAD_READ		14
>  
>  #define QUADSPI_MIN_IOMAP SZ_4M
>  
> @@ -229,7 +238,8 @@ static struct fsl_qspi_devtype_data vybrid_data = {
>  	.rxfifo = 128,
>  	.txfifo = 64,
>  	.ahb_buf_size = 1024,
> -	.driver_data = QUADSPI_QUIRK_SWAP_ENDIAN,
> +	.driver_data = QUADSPI_QUIRK_SWAP_ENDIAN
> +		       | QUADSPI_QUIRK_AHB_READ_HALF_FIFO,
>  };
>  
>  static struct fsl_qspi_devtype_data imx6sx_data = {
> @@ -309,6 +319,11 @@ static inline int needs_wakeup_wait_mode(struct fsl_qspi *q)
>  	return q->devtype_data->driver_data & QUADSPI_QUIRK_TKT245618;
>  }
>  
> +static inline int needs_half_fifo(struct fsl_qspi *q)
> +{
> +	return q->devtype_data->driver_data & QUADSPI_QUIRK_AHB_READ_HALF_FIFO;
> +}
> +
>  /*
>   * R/W functions for big- or little-endian registers:
>   * The qSPI controller's endian is independent of the CPU core's endian.
> @@ -373,8 +388,20 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	void __iomem *base = q->iobase;
>  	int rxfifo = q->devtype_data->rxfifo;
>  	u32 lut_base;
> -	u8 cmd, addrlen, dummy;
> +	u8 cmd, addrlen;
>  	int i;
> +	/* use 8 dummy cycles for all fast operations */
> +	const int dummy = 8;
> +
> +	/* use only half fifo if controller needs that */
> +	if (needs_half_fifo(q))
> +		rxfifo /= 2;
> +
> +    /* use 24-bit addresses for up to 16MB, 32-bit above 16MB */
> +	if (q->nor_size <= SZ_16M)
> +		addrlen = ADDR24BIT;
> +	else
> +		addrlen = ADDR32BIT;
>  
>  	fsl_qspi_unlock_lut(q);
>  
> @@ -382,24 +409,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	for (i = 0; i < QUADSPI_LUT_NUM; i++)
>  		qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
>  
> -	/* Quad Read */
> -	lut_base = SEQID_QUAD_READ * 4;
> -
> -	if (q->nor_size <= SZ_16M) {
> -		cmd = SPINOR_OP_READ_1_1_4;
> -		addrlen = ADDR24BIT;
> -		dummy = 8;
> -	} else {
> -		/* use the 4-byte address */
> -		cmd = SPINOR_OP_READ_1_1_4;
> -		addrlen = ADDR32BIT;
> -		dummy = 8;
> -	}
> -
> -	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
> -			base + QUADSPI_LUT(lut_base));
> -	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
> -			base + QUADSPI_LUT(lut_base + 1));
> +	/* Read */
> +	lut_base = SEQID_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(FSL_READ, PAD1, rxfifo) | LUT1(JMP_ON_CS, PAD1, SEQID_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 1));
>  
>  	/* Write enable */
>  	lut_base = SEQID_WREN * 4;
> @@ -409,16 +424,7 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	/* Page Program */
>  	lut_base = SEQID_PP * 4;
>  
> -	if (q->nor_size <= SZ_16M) {
> -		cmd = SPINOR_OP_PP;
> -		addrlen = ADDR24BIT;
> -	} else {
> -		/* use the 4-byte address */
> -		cmd = SPINOR_OP_PP;
> -		addrlen = ADDR32BIT;
> -	}
> -
> -	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_PP) | LUT1(ADDR, PAD1, addrlen),
>  			base + QUADSPI_LUT(lut_base));
>  	qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
>  			base + QUADSPI_LUT(lut_base + 1));
> @@ -431,7 +437,6 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  
>  	/* Erase a sector */
>  	lut_base = SEQID_SE * 4;
> -
>  	cmd = q->nor[0].erase_opcode;
>  	addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
>  
> @@ -476,6 +481,33 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_BRWR),
>  			base + QUADSPI_LUT(lut_base));
>  
> +	/* Fast Read */
> +	lut_base = SEQID_FAST_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_FAST) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD1, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_FAST_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
> +	/* Dual Read */
> +	lut_base = SEQID_DUAL_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_2) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD2, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_DUAL_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
> +	/* Quad Read */
> +	lut_base = SEQID_QUAD_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_4) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_QUAD_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
>  	fsl_qspi_lock_lut(q);
>  }
>  
> @@ -483,8 +515,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
>  {
>  	switch (cmd) {
> -	case SPINOR_OP_READ_1_1_4:
> -		return SEQID_QUAD_READ;
> +	case SPINOR_OP_READ:
> +		return SEQID_READ;
>  	case SPINOR_OP_WREN:
>  		return SEQID_WREN;
>  	case SPINOR_OP_WRDI:
> @@ -507,6 +539,12 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
>  		return SEQID_EN4B;
>  	case SPINOR_OP_BRWR:
>  		return SEQID_BRWR;
> +	case SPINOR_OP_READ_FAST:
> +		return SEQID_FAST_READ;
> +	case SPINOR_OP_READ_1_1_2:
> +		return SEQID_DUAL_READ;
> +	case SPINOR_OP_READ_1_1_4:
> +		return SEQID_QUAD_READ;
>  	default:
>  		if (cmd == q->nor[0].erase_opcode)
>  			return SEQID_SE;
> @@ -676,11 +714,16 @@ static void fsl_qspi_set_map_addr(struct fsl_qspi *q)
>   * the memcpy to read the data directly. A "missed" access to the buffer
>   * causes the controller to clear the buffer, and use the sequence pointed
>   * by the QUADSPI_BFGENCR[SEQID] to initiate a read from the flash.
> + *
> + * When using two ports, the SEQID to use for one port might differ from
> + * the one to use for the other (e.g., if one port can do 4-pad reads but
> + * the other cannot). So we set up a basic mode here (SEQID_READ) and we
> + * will set up the proper SEQID for the port right before doing the AHB
> + * access(es).
>   */
>  static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  {
>  	void __iomem *base = q->iobase;
> -	int seqid;
>  
>  	/* AHB configuration for access buffer 0/1/2 .*/
>  	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
> @@ -688,11 +731,13 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
>  	/*
>  	 * Set ADATSZ with the maximum AHB buffer size to improve the
> -	 * read performance.
> +	 * read performance, except when the controller should not use
> +	 * more than half its RX fifo in AHB reads, in which case read
> +	 * size is given in the LUT FSL_READ instructions.
>  	 */
>  	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
> -			((q->devtype_data->ahb_buf_size / 8)
> -			<< QUADSPI_BUF3CR_ADATSZ_SHIFT),
> +			(needs_half_fifo(q)? 0 : q->devtype_data->ahb_buf_size / 8)
> +	        << QUADSPI_BUF3CR_ADATSZ_SHIFT,
>  			base + QUADSPI_BUF3CR);
>  
>  	/* We only use the buffer3 */
> @@ -700,9 +745,8 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
>  	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
>  
> -	/* Set the default lut sequence for AHB Read. */
> -	seqid = fsl_qspi_get_seqid(q, q->nor[0].read_opcode);
> -	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
> +	/* Set the default lut sequence for AHB Read to READ. */
> +	qspi_writel(q, SEQID_READ << QUADSPI_BFGENCR_SEQID_SHIFT,
>  		q->iobase + QUADSPI_BFGENCR);
>  }
>  
> @@ -841,6 +885,7 @@ static int fsl_qspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
>  		return ret;
>  
>  	fsl_qspi_read_data(q, len, buf);
> +
>  	return 0;
>  }
>  
> @@ -887,11 +932,27 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  {
>  	struct fsl_qspi *q = nor->priv;
>  	u8 cmd = nor->read_opcode;
> +	uint32_t seqid;
> +	size_t qlen = q->nor_size * 4;
> +	int nor_idx = nor - q->nor;
> +	size_t nor_ofs = q->nor_size * nor_idx;
> +
> +	/* Set the actual lut sequence for AHB Read from the considered nor. */
> +	seqid = fsl_qspi_get_seqid(q, nor->read_opcode);
> +	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
> +		q->iobase + QUADSPI_BFGENCR);
> +	/* Reset the AHB sequence pointer */
> +	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC_MASK, q->iobase + QUADSPI_SPTRCLR);
> +    /* make sure the Rx buffer is read through AHB, not IP */
> +	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, q->iobase + QUADSPI_RBCT);
> +
> +	/* set the chip address for READID */
> +	fsl_qspi_set_base_addr(q, q->nor);
>  
>  	/* if necessary,ioremap buffer before AHB read, */
>  	if (!q->ahb_addr) {
> -		q->memmap_offs = q->chip_base_addr + from;
> -		q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
> +		q->memmap_offs = q->chip_base_addr;
> +		q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
>  
>  		q->ahb_addr = ioremap_nocache(
>  				q->memmap_phy + q->memmap_offs,
> @@ -906,8 +967,8 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  			q->memmap_offs + q->memmap_len) {
>  		iounmap(q->ahb_addr);
>  
> -		q->memmap_offs = q->chip_base_addr + from;
> -		q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
> +		q->memmap_offs = q->chip_base_addr;
> +		q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
>  		q->ahb_addr = ioremap_nocache(
>  				q->memmap_phy + q->memmap_offs,
>  				q->memmap_len);
> @@ -917,13 +978,11 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  		}
>  	}
>  
> -	dev_dbg(q->dev, "cmd [%x],read from %p, len:%zd\n",
> -		cmd, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> -		len);
> +	dev_dbg(q->dev, "cmd [%x], read from %p, len:%zd\n",
> +		cmd, q->ahb_addr + nor_ofs + from, len);
>  
>  	/* Read out the data directly from the AHB buffer.*/
> -	memcpy(buf, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> -		len);
> +	memcpy(buf, q->ahb_addr + nor_ofs + from, len);
>  
>  	return len;
>  }
> @@ -980,6 +1039,11 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  	struct spi_nor *nor;
>  	struct mtd_info *mtd;
>  	int ret, i = 0;
> +	const struct of_device_id *of_id =
> +			of_match_device(fsl_qspi_dt_ids, &pdev->dev);
> +	enum read_mode readmode;
> +	unsigned int buswidth;
> +	u32 qspimaxfreq, spimaxfreq;
>  
>  	q = devm_kzalloc(dev, sizeof(*q), GFP_KERNEL);
>  	if (!q)
> @@ -1050,6 +1114,9 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  
>  	mutex_init(&q->lock);
>  
> +	/* Running MIN of all "spi-max-frequency' values present in subnodes */
> +	qspimaxfreq = INT_MAX;
> +
>  	/* iterate the subnodes. */
>  	for_each_available_child_of_node(dev->of_node, np) {
>  		/* skip the holes */
> @@ -1073,15 +1140,44 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  		nor->prepare = fsl_qspi_prep;
>  		nor->unprepare = fsl_qspi_unprep;
>  
> +		/* Update running MIN of subnode max frequencies */
>  		ret = of_property_read_u32(np, "spi-max-frequency",
> -				&q->clk_rate);
> +				&spimaxfreq);
>  		if (ret < 0)
>  			goto mutex_failed;
> +		qspimaxfreq = (qspimaxfreq > spimaxfreq) ? spimaxfreq: qspimaxfreq;
>  
>  		/* set the chip address for READID */
>  		fsl_qspi_set_base_addr(q, nor);
>  
> -		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
> +		/* scan using the proper read mode for this subnode */
> +		if (of_property_read_u32(np, "spi-bus-width", &buswidth)<0) {
> +			nor->read_opcode = SPINOR_OP_READ;
> +			readmode = SPI_NOR_NORMAL;
> +		} else {
> +			switch (buswidth) {
> +			case 4:
> +				nor->read_opcode = SPINOR_OP_READ_1_1_4;
> +				readmode = SPI_NOR_QUAD;
> +				break;
> +			case 2:
> +				nor->read_opcode = SPINOR_OP_READ_1_1_2;
> +				readmode = SPI_NOR_DUAL;
> +				break;
> +			case 1:
> +				nor->read_opcode = SPINOR_OP_READ_FAST;
> +				readmode = SPI_NOR_FAST;
> +				break;
> +			default:
> +				dev_warn(q->dev,
> +					"ignoring spi-bus-width=%d (should be 1, 2 or 4)",
> +					buswidth);
> +				nor->read_opcode = SPINOR_OP_READ;
> +				readmode = SPI_NOR_NORMAL;
> +			}
> +		}
> +
> +		ret = spi_nor_scan(nor, NULL, readmode);
>  		if (ret)
>  			goto mutex_failed;
>  
> @@ -1112,6 +1208,10 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  		i++;
>  	}
>  
> +	/* If we know the lowest subnode frequency, override driver default. */
> +	if (qspimaxfreq<INT_MAX)
> +		q->clk_rate = qspimaxfreq;
> +
>  	/* finish the rest init. */
>  	ret = fsl_qspi_nor_setup_last(q);
>  	if (ret)
>
Cyrille Pitchen Oct. 19, 2016, 4:42 p.m. UTC | #5
Hi Albert,

+Yunhui

Le 27/09/2016 à 07:59, Albert ARIBAUD (3ADEV) a écrit :
> Fix QUAD read LUT sequence missing JMP_ON_CS which caused
> read corruptions with non-1K-size reads on BK4R1 machine.
> 
> Add NORMAL, DUAL and FAST read sequences.
> 
> Introduce spi-bus-width property for bus subnodes, to
> specify per-bus capability to use NORMAL, FAST, DUAL,
> and/or QUAD reads.
> 
> Also:
> 
> Simplify ioremap/memcpy usage.
> 
> Make controller clock frequency equal to the lowest of
> subnode max frequencies instead of the last one.
> 
> Limit Vybrid to use only half its RX fifo, using more will
> cause corrupt reads.
> 
> Signed-off-by: Albert ARIBAUD (3ADEV) <albert.aribaud@3adev.fr>
> ---
>  drivers/mtd/spi-nor/fsl-quadspi.c | 204 ++++++++++++++++++++++++++++----------
>  1 file changed, 152 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
> index 5c82e4e..9811cf2 100644
> --- a/drivers/mtd/spi-nor/fsl-quadspi.c
> +++ b/drivers/mtd/spi-nor/fsl-quadspi.c
> @@ -41,6 +41,8 @@
>  #define QUADSPI_QUIRK_TKT253890		(1 << 2)
>  /* Controller cannot wake up from wait mode, TKT245618 */
>  #define QUADSPI_QUIRK_TKT245618         (1 << 3)
> +/* Controller can only read half a rx fifo through AHB */
> +#define QUADSPI_QUIRK_AHB_READ_HALF_FIFO  (1 << 4)
>  
>  /* The registers */
>  #define QUADSPI_MCR			0x00
> @@ -117,6 +119,10 @@
>  #define QUADSPI_FR			0x160
>  #define QUADSPI_FR_TFF_MASK		0x1
>  
> +#define QUADSPI_SPTRCLR			0x15c
> +#define QUADSPI_SPTRCLR_BFPTRC_SHIFT		0
> +#define QUADSPI_SPTRCLR_BFPTRC_MASK		(0x1 << QUADSPI_SPTRCLR_BFPTRC_SHIFT)
> +
>  #define QUADSPI_SFA1AD			0x180
>  #define QUADSPI_SFA2AD			0x184
>  #define QUADSPI_SFB1AD			0x188
> @@ -193,7 +199,7 @@
>  #define QUADSPI_LUT_NUM		64
>  
>  /* SEQID -- we can have 16 seqids at most. */
> -#define SEQID_QUAD_READ		0
> +#define SEQID_READ		0
>  #define SEQID_WREN		1
>  #define SEQID_WRDI		2
>  #define SEQID_RDSR		3
> @@ -205,6 +211,9 @@
>  #define SEQID_RDCR		9
>  #define SEQID_EN4B		10
>  #define SEQID_BRWR		11
> +#define SEQID_FAST_READ		12
> +#define SEQID_DUAL_READ		13
> +#define SEQID_QUAD_READ		14
>  
>  #define QUADSPI_MIN_IOMAP SZ_4M
>  
> @@ -229,7 +238,8 @@ static struct fsl_qspi_devtype_data vybrid_data = {
>  	.rxfifo = 128,
>  	.txfifo = 64,
>  	.ahb_buf_size = 1024,
> -	.driver_data = QUADSPI_QUIRK_SWAP_ENDIAN,
> +	.driver_data = QUADSPI_QUIRK_SWAP_ENDIAN
> +		       | QUADSPI_QUIRK_AHB_READ_HALF_FIFO,
>  };
>  
>  static struct fsl_qspi_devtype_data imx6sx_data = {
> @@ -309,6 +319,11 @@ static inline int needs_wakeup_wait_mode(struct fsl_qspi *q)
>  	return q->devtype_data->driver_data & QUADSPI_QUIRK_TKT245618;
>  }
>  
> +static inline int needs_half_fifo(struct fsl_qspi *q)
> +{
> +	return q->devtype_data->driver_data & QUADSPI_QUIRK_AHB_READ_HALF_FIFO;
> +}
> +
>  /*
>   * R/W functions for big- or little-endian registers:
>   * The qSPI controller's endian is independent of the CPU core's endian.
> @@ -373,8 +388,20 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	void __iomem *base = q->iobase;
>  	int rxfifo = q->devtype_data->rxfifo;
>  	u32 lut_base;
> -	u8 cmd, addrlen, dummy;
> +	u8 cmd, addrlen;
>  	int i;
> +	/* use 8 dummy cycles for all fast operations */
> +	const int dummy = 8;
> +
> +	/* use only half fifo if controller needs that */
> +	if (needs_half_fifo(q))
> +		rxfifo /= 2;
> +
> +    /* use 24-bit addresses for up to 16MB, 32-bit above 16MB */
> +	if (q->nor_size <= SZ_16M)
> +		addrlen = ADDR24BIT;
> +	else
> +		addrlen = ADDR32BIT;
>  
>  	fsl_qspi_unlock_lut(q);
>  
> @@ -382,24 +409,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	for (i = 0; i < QUADSPI_LUT_NUM; i++)
>  		qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
>  
> -	/* Quad Read */
> -	lut_base = SEQID_QUAD_READ * 4;
> -
> -	if (q->nor_size <= SZ_16M) {
> -		cmd = SPINOR_OP_READ_1_1_4;
> -		addrlen = ADDR24BIT;
> -		dummy = 8;
> -	} else {
> -		/* use the 4-byte address */
> -		cmd = SPINOR_OP_READ_1_1_4;
> -		addrlen = ADDR32BIT;
> -		dummy = 8;
> -	}
> -
> -	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
> -			base + QUADSPI_LUT(lut_base));
> -	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
> -			base + QUADSPI_LUT(lut_base + 1));
> +	/* Read */
> +	lut_base = SEQID_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(FSL_READ, PAD1, rxfifo) | LUT1(JMP_ON_CS, PAD1, SEQID_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 1));

It seems that both Yunhui and you work on the same topic, maybe you can
synchronize?
https://patchwork.ozlabs.org/patch/660356/

>  
>  	/* Write enable */
>  	lut_base = SEQID_WREN * 4;
> @@ -409,16 +424,7 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	/* Page Program */
>  	lut_base = SEQID_PP * 4;
>  
> -	if (q->nor_size <= SZ_16M) {
> -		cmd = SPINOR_OP_PP;
> -		addrlen = ADDR24BIT;
> -	} else {
> -		/* use the 4-byte address */
> -		cmd = SPINOR_OP_PP;
> -		addrlen = ADDR32BIT;
> -	}
> -
> -	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_PP) | LUT1(ADDR, PAD1, addrlen),
>  			base + QUADSPI_LUT(lut_base));
>  	qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
>  			base + QUADSPI_LUT(lut_base + 1));
> @@ -431,7 +437,6 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  
>  	/* Erase a sector */
>  	lut_base = SEQID_SE * 4;
> -
>  	cmd = q->nor[0].erase_opcode;
>  	addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
>  
> @@ -476,6 +481,33 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_BRWR),
>  			base + QUADSPI_LUT(lut_base));
>  
> +	/* Fast Read */
> +	lut_base = SEQID_FAST_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_FAST) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD1, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_FAST_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
> +	/* Dual Read */
> +	lut_base = SEQID_DUAL_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_2) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD2, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_DUAL_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
> +	/* Quad Read */
> +	lut_base = SEQID_QUAD_READ * 4;
> +	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_4) | LUT1(ADDR, PAD1, addrlen),
> +		base + QUADSPI_LUT(lut_base));
> +	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
> +		base + QUADSPI_LUT(lut_base + 1));
> +	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_QUAD_READ * 8),
> +		base + QUADSPI_LUT(lut_base + 2));
> +
>  	fsl_qspi_lock_lut(q);
>  }
>  
> @@ -483,8 +515,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
>  static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
>  {
>  	switch (cmd) {
> -	case SPINOR_OP_READ_1_1_4:
> -		return SEQID_QUAD_READ;
> +	case SPINOR_OP_READ:
> +		return SEQID_READ;
>  	case SPINOR_OP_WREN:
>  		return SEQID_WREN;
>  	case SPINOR_OP_WRDI:
> @@ -507,6 +539,12 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
>  		return SEQID_EN4B;
>  	case SPINOR_OP_BRWR:
>  		return SEQID_BRWR;
> +	case SPINOR_OP_READ_FAST:
> +		return SEQID_FAST_READ;
> +	case SPINOR_OP_READ_1_1_2:
> +		return SEQID_DUAL_READ;
> +	case SPINOR_OP_READ_1_1_4:
> +		return SEQID_QUAD_READ;

I don't think it is a good thing to select the relevant "READ" LUT entry
according to the command op code. What if spi-nor.c introduces new op codes?

Maybe you could index the LUT entries by functions:
- READ_REG: the LUT op code would be dynamically updated by your
            .read_reg(nor, opcode, ...) implementation according to opcode.
- WRITE_REG: the same as above with the .write_reg(nor, opcode, ...) hook.
- READ_SLAVE1: read memory from the first SPI-NOR memory.
- WRITE_SLAVE1: write into the first SPI-NOR memory.
- READ_SLAVE2: read memory form the 2nd SPI-NOR memory.
...
- WRITE_SLAVEN: write into the N-th SPI-NOR memory.

Also be aware even for the .read() hook the nor->read_opcode might changes
between calls!

I don't know whether those comments fit your actual needs and the Freescale SPI
controller hardware constraints but I'm always worried about the ease to maintain
SPI-NOR controller drivers when they are not "op code agnostic".

Best regards,

Cyrille

>  	default:
>  		if (cmd == q->nor[0].erase_opcode)
>  			return SEQID_SE;
> @@ -676,11 +714,16 @@ static void fsl_qspi_set_map_addr(struct fsl_qspi *q)
>   * the memcpy to read the data directly. A "missed" access to the buffer
>   * causes the controller to clear the buffer, and use the sequence pointed
>   * by the QUADSPI_BFGENCR[SEQID] to initiate a read from the flash.
> + *
> + * When using two ports, the SEQID to use for one port might differ from
> + * the one to use for the other (e.g., if one port can do 4-pad reads but
> + * the other cannot). So we set up a basic mode here (SEQID_READ) and we
> + * will set up the proper SEQID for the port right before doing the AHB
> + * access(es).
>   */
>  static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  {
>  	void __iomem *base = q->iobase;
> -	int seqid;
>  
>  	/* AHB configuration for access buffer 0/1/2 .*/
>  	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
> @@ -688,11 +731,13 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
>  	/*
>  	 * Set ADATSZ with the maximum AHB buffer size to improve the
> -	 * read performance.
> +	 * read performance, except when the controller should not use
> +	 * more than half its RX fifo in AHB reads, in which case read
> +	 * size is given in the LUT FSL_READ instructions.
>  	 */
>  	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
> -			((q->devtype_data->ahb_buf_size / 8)
> -			<< QUADSPI_BUF3CR_ADATSZ_SHIFT),
> +			(needs_half_fifo(q)? 0 : q->devtype_data->ahb_buf_size / 8)
> +	        << QUADSPI_BUF3CR_ADATSZ_SHIFT,
>  			base + QUADSPI_BUF3CR);
>  
>  	/* We only use the buffer3 */
> @@ -700,9 +745,8 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
>  	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
>  	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
>  
> -	/* Set the default lut sequence for AHB Read. */
> -	seqid = fsl_qspi_get_seqid(q, q->nor[0].read_opcode);
> -	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
> +	/* Set the default lut sequence for AHB Read to READ. */
> +	qspi_writel(q, SEQID_READ << QUADSPI_BFGENCR_SEQID_SHIFT,
>  		q->iobase + QUADSPI_BFGENCR);
>  }
>  
> @@ -841,6 +885,7 @@ static int fsl_qspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
>  		return ret;
>  
>  	fsl_qspi_read_data(q, len, buf);
> +
>  	return 0;
>  }
>  
> @@ -887,11 +932,27 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  {
>  	struct fsl_qspi *q = nor->priv;
>  	u8 cmd = nor->read_opcode;
> +	uint32_t seqid;
> +	size_t qlen = q->nor_size * 4;
> +	int nor_idx = nor - q->nor;
> +	size_t nor_ofs = q->nor_size * nor_idx;
> +
> +	/* Set the actual lut sequence for AHB Read from the considered nor. */
> +	seqid = fsl_qspi_get_seqid(q, nor->read_opcode);
> +	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
> +		q->iobase + QUADSPI_BFGENCR);
> +	/* Reset the AHB sequence pointer */
> +	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC_MASK, q->iobase + QUADSPI_SPTRCLR);
> +    /* make sure the Rx buffer is read through AHB, not IP */
> +	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, q->iobase + QUADSPI_RBCT);
> +
> +	/* set the chip address for READID */
> +	fsl_qspi_set_base_addr(q, q->nor);
>  
>  	/* if necessary,ioremap buffer before AHB read, */
>  	if (!q->ahb_addr) {
> -		q->memmap_offs = q->chip_base_addr + from;
> -		q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
> +		q->memmap_offs = q->chip_base_addr;
> +		q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
>  
>  		q->ahb_addr = ioremap_nocache(
>  				q->memmap_phy + q->memmap_offs,
> @@ -906,8 +967,8 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  			q->memmap_offs + q->memmap_len) {
>  		iounmap(q->ahb_addr);
>  
> -		q->memmap_offs = q->chip_base_addr + from;
> -		q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
> +		q->memmap_offs = q->chip_base_addr;
> +		q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
>  		q->ahb_addr = ioremap_nocache(
>  				q->memmap_phy + q->memmap_offs,
>  				q->memmap_len);
> @@ -917,13 +978,11 @@ static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
>  		}
>  	}
>  
> -	dev_dbg(q->dev, "cmd [%x],read from %p, len:%zd\n",
> -		cmd, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> -		len);
> +	dev_dbg(q->dev, "cmd [%x], read from %p, len:%zd\n",
> +		cmd, q->ahb_addr + nor_ofs + from, len);
>  
>  	/* Read out the data directly from the AHB buffer.*/
> -	memcpy(buf, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
> -		len);
> +	memcpy(buf, q->ahb_addr + nor_ofs + from, len);
>  
>  	return len;
>  }
> @@ -980,6 +1039,11 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  	struct spi_nor *nor;
>  	struct mtd_info *mtd;
>  	int ret, i = 0;
> +	const struct of_device_id *of_id =
> +			of_match_device(fsl_qspi_dt_ids, &pdev->dev);
> +	enum read_mode readmode;
> +	unsigned int buswidth;
> +	u32 qspimaxfreq, spimaxfreq;
>  
>  	q = devm_kzalloc(dev, sizeof(*q), GFP_KERNEL);
>  	if (!q)
> @@ -1050,6 +1114,9 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  
>  	mutex_init(&q->lock);
>  
> +	/* Running MIN of all "spi-max-frequency' values present in subnodes */
> +	qspimaxfreq = INT_MAX;
> +
>  	/* iterate the subnodes. */
>  	for_each_available_child_of_node(dev->of_node, np) {
>  		/* skip the holes */
> @@ -1073,15 +1140,44 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  		nor->prepare = fsl_qspi_prep;
>  		nor->unprepare = fsl_qspi_unprep;
>  
> +		/* Update running MIN of subnode max frequencies */
>  		ret = of_property_read_u32(np, "spi-max-frequency",
> -				&q->clk_rate);
> +				&spimaxfreq);
>  		if (ret < 0)
>  			goto mutex_failed;
> +		qspimaxfreq = (qspimaxfreq > spimaxfreq) ? spimaxfreq: qspimaxfreq;
>  
>  		/* set the chip address for READID */
>  		fsl_qspi_set_base_addr(q, nor);
>  
> -		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
> +		/* scan using the proper read mode for this subnode */
> +		if (of_property_read_u32(np, "spi-bus-width", &buswidth)<0) {
> +			nor->read_opcode = SPINOR_OP_READ;
> +			readmode = SPI_NOR_NORMAL;
> +		} else {
> +			switch (buswidth) {
> +			case 4:
> +				nor->read_opcode = SPINOR_OP_READ_1_1_4;
> +				readmode = SPI_NOR_QUAD;
> +				break;
> +			case 2:
> +				nor->read_opcode = SPINOR_OP_READ_1_1_2;
> +				readmode = SPI_NOR_DUAL;
> +				break;
> +			case 1:
> +				nor->read_opcode = SPINOR_OP_READ_FAST;
> +				readmode = SPI_NOR_FAST;
> +				break;
> +			default:
> +				dev_warn(q->dev,
> +					"ignoring spi-bus-width=%d (should be 1, 2 or 4)",
> +					buswidth);
> +				nor->read_opcode = SPINOR_OP_READ;
> +				readmode = SPI_NOR_NORMAL;
> +			}
> +		}
> +
> +		ret = spi_nor_scan(nor, NULL, readmode);
>  		if (ret)
>  			goto mutex_failed;
>  
> @@ -1112,6 +1208,10 @@ static int fsl_qspi_probe(struct platform_device *pdev)
>  		i++;
>  	}
>  
> +	/* If we know the lowest subnode frequency, override driver default. */
> +	if (qspimaxfreq<INT_MAX)
> +		q->clk_rate = qspimaxfreq;
> +
>  	/* finish the rest init. */
>  	ret = fsl_qspi_nor_setup_last(q);
>  	if (ret)
>
Albert ARIBAUD (3ADEV) Oct. 20, 2016, 7:16 a.m. UTC | #6
Hi Cyrille,

Le Wed, 19 Oct 2016 18:42:36 +0200, Cyrille Pitchen
<cyrille.pitchen@atmel.com> a écrit :

> Hi Albert,
> 
> +Yunhui


> It seems that both Yunhui and you work on the same topic, maybe you can
> synchronize?
> https://patchwork.ozlabs.org/patch/660356/

I am already preparing a v2 based over Yunhui's series. :)

> I don't think it is a good thing to select the relevant "READ" LUT entry
> according to the command op code. What if spi-nor.c introduces new op codes?
> 
> Maybe you could index the LUT entries by functions:
> - READ_REG: the LUT op code would be dynamically updated by your
>             .read_reg(nor, opcode, ...) implementation according to opcode.
> - WRITE_REG: the same as above with the .write_reg(nor, opcode, ...) hook.
> - READ_SLAVE1: read memory from the first SPI-NOR memory.
> - WRITE_SLAVE1: write into the first SPI-NOR memory.
> - READ_SLAVE2: read memory form the 2nd SPI-NOR memory.
> ...
> - WRITE_SLAVEN: write into the N-th SPI-NOR memory.

Actually, my needs could be met using Han Xu's dynamic LUT entries
without the need to introduce 'virtual' opcodes (and without having
to 'leak back' those new opcodes into other kernel code portions).

The per-slave differences (in my case, bus width essentially) can be
managed though DT entries and corresponding per-nor flags in the driver.

> Also be aware even for the .read() hook the nor->read_opcode might changes
> between calls!

Noted -- I'll check that the driver always use the current NOR codes.

> I don't know whether those comments fit your actual needs and the Freescale SPI
> controller hardware constraints but I'm always worried about the ease to maintain
> SPI-NOR controller drivers when they are not "op code agnostic".

Understood and agreed.

Thanks for yuor feedback!
 
> Best regards,
> 
> Cyrille

Cordialement,
Albert ARIBAUD
3ADEV
Han Xu Oct. 20, 2016, 2:54 p.m. UTC | #7

diff mbox

Patch

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 5c82e4e..9811cf2 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -41,6 +41,8 @@ 
 #define QUADSPI_QUIRK_TKT253890		(1 << 2)
 /* Controller cannot wake up from wait mode, TKT245618 */
 #define QUADSPI_QUIRK_TKT245618         (1 << 3)
+/* Controller can only read half a rx fifo through AHB */
+#define QUADSPI_QUIRK_AHB_READ_HALF_FIFO  (1 << 4)
 
 /* The registers */
 #define QUADSPI_MCR			0x00
@@ -117,6 +119,10 @@ 
 #define QUADSPI_FR			0x160
 #define QUADSPI_FR_TFF_MASK		0x1
 
+#define QUADSPI_SPTRCLR			0x15c
+#define QUADSPI_SPTRCLR_BFPTRC_SHIFT		0
+#define QUADSPI_SPTRCLR_BFPTRC_MASK		(0x1 << QUADSPI_SPTRCLR_BFPTRC_SHIFT)
+
 #define QUADSPI_SFA1AD			0x180
 #define QUADSPI_SFA2AD			0x184
 #define QUADSPI_SFB1AD			0x188
@@ -193,7 +199,7 @@ 
 #define QUADSPI_LUT_NUM		64
 
 /* SEQID -- we can have 16 seqids at most. */
-#define SEQID_QUAD_READ		0
+#define SEQID_READ		0
 #define SEQID_WREN		1
 #define SEQID_WRDI		2
 #define SEQID_RDSR		3
@@ -205,6 +211,9 @@ 
 #define SEQID_RDCR		9
 #define SEQID_EN4B		10
 #define SEQID_BRWR		11
+#define SEQID_FAST_READ		12
+#define SEQID_DUAL_READ		13
+#define SEQID_QUAD_READ		14
 
 #define QUADSPI_MIN_IOMAP SZ_4M
 
@@ -229,7 +238,8 @@  static struct fsl_qspi_devtype_data vybrid_data = {
 	.rxfifo = 128,
 	.txfifo = 64,
 	.ahb_buf_size = 1024,
-	.driver_data = QUADSPI_QUIRK_SWAP_ENDIAN,
+	.driver_data = QUADSPI_QUIRK_SWAP_ENDIAN
+		       | QUADSPI_QUIRK_AHB_READ_HALF_FIFO,
 };
 
 static struct fsl_qspi_devtype_data imx6sx_data = {
@@ -309,6 +319,11 @@  static inline int needs_wakeup_wait_mode(struct fsl_qspi *q)
 	return q->devtype_data->driver_data & QUADSPI_QUIRK_TKT245618;
 }
 
+static inline int needs_half_fifo(struct fsl_qspi *q)
+{
+	return q->devtype_data->driver_data & QUADSPI_QUIRK_AHB_READ_HALF_FIFO;
+}
+
 /*
  * R/W functions for big- or little-endian registers:
  * The qSPI controller's endian is independent of the CPU core's endian.
@@ -373,8 +388,20 @@  static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	void __iomem *base = q->iobase;
 	int rxfifo = q->devtype_data->rxfifo;
 	u32 lut_base;
-	u8 cmd, addrlen, dummy;
+	u8 cmd, addrlen;
 	int i;
+	/* use 8 dummy cycles for all fast operations */
+	const int dummy = 8;
+
+	/* use only half fifo if controller needs that */
+	if (needs_half_fifo(q))
+		rxfifo /= 2;
+
+    /* use 24-bit addresses for up to 16MB, 32-bit above 16MB */
+	if (q->nor_size <= SZ_16M)
+		addrlen = ADDR24BIT;
+	else
+		addrlen = ADDR32BIT;
 
 	fsl_qspi_unlock_lut(q);
 
@@ -382,24 +409,12 @@  static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	for (i = 0; i < QUADSPI_LUT_NUM; i++)
 		qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
 
-	/* Quad Read */
-	lut_base = SEQID_QUAD_READ * 4;
-
-	if (q->nor_size <= SZ_16M) {
-		cmd = SPINOR_OP_READ_1_1_4;
-		addrlen = ADDR24BIT;
-		dummy = 8;
-	} else {
-		/* use the 4-byte address */
-		cmd = SPINOR_OP_READ_1_1_4;
-		addrlen = ADDR32BIT;
-		dummy = 8;
-	}
-
-	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
-			base + QUADSPI_LUT(lut_base));
-	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
-			base + QUADSPI_LUT(lut_base + 1));
+	/* Read */
+	lut_base = SEQID_READ * 4;
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ) | LUT1(ADDR, PAD1, addrlen),
+		base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(FSL_READ, PAD1, rxfifo) | LUT1(JMP_ON_CS, PAD1, SEQID_READ * 8),
+		base + QUADSPI_LUT(lut_base + 1));
 
 	/* Write enable */
 	lut_base = SEQID_WREN * 4;
@@ -409,16 +424,7 @@  static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	/* Page Program */
 	lut_base = SEQID_PP * 4;
 
-	if (q->nor_size <= SZ_16M) {
-		cmd = SPINOR_OP_PP;
-		addrlen = ADDR24BIT;
-	} else {
-		/* use the 4-byte address */
-		cmd = SPINOR_OP_PP;
-		addrlen = ADDR32BIT;
-	}
-
-	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_PP) | LUT1(ADDR, PAD1, addrlen),
 			base + QUADSPI_LUT(lut_base));
 	qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
 			base + QUADSPI_LUT(lut_base + 1));
@@ -431,7 +437,6 @@  static void fsl_qspi_init_lut(struct fsl_qspi *q)
 
 	/* Erase a sector */
 	lut_base = SEQID_SE * 4;
-
 	cmd = q->nor[0].erase_opcode;
 	addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
 
@@ -476,6 +481,33 @@  static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_BRWR),
 			base + QUADSPI_LUT(lut_base));
 
+	/* Fast Read */
+	lut_base = SEQID_FAST_READ * 4;
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_FAST) | LUT1(ADDR, PAD1, addrlen),
+		base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD1, rxfifo),
+		base + QUADSPI_LUT(lut_base + 1));
+	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_FAST_READ * 8),
+		base + QUADSPI_LUT(lut_base + 2));
+
+	/* Dual Read */
+	lut_base = SEQID_DUAL_READ * 4;
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_2) | LUT1(ADDR, PAD1, addrlen),
+		base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD2, rxfifo),
+		base + QUADSPI_LUT(lut_base + 1));
+	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_DUAL_READ * 8),
+		base + QUADSPI_LUT(lut_base + 2));
+
+	/* Quad Read */
+	lut_base = SEQID_QUAD_READ * 4;
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_READ_1_1_4) | LUT1(ADDR, PAD1, addrlen),
+		base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
+		base + QUADSPI_LUT(lut_base + 1));
+	qspi_writel(q, LUT0(JMP_ON_CS, PAD1, SEQID_QUAD_READ * 8),
+		base + QUADSPI_LUT(lut_base + 2));
+
 	fsl_qspi_lock_lut(q);
 }
 
@@ -483,8 +515,8 @@  static void fsl_qspi_init_lut(struct fsl_qspi *q)
 static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
 	switch (cmd) {
-	case SPINOR_OP_READ_1_1_4:
-		return SEQID_QUAD_READ;
+	case SPINOR_OP_READ:
+		return SEQID_READ;
 	case SPINOR_OP_WREN:
 		return SEQID_WREN;
 	case SPINOR_OP_WRDI:
@@ -507,6 +539,12 @@  static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 		return SEQID_EN4B;
 	case SPINOR_OP_BRWR:
 		return SEQID_BRWR;
+	case SPINOR_OP_READ_FAST:
+		return SEQID_FAST_READ;
+	case SPINOR_OP_READ_1_1_2:
+		return SEQID_DUAL_READ;
+	case SPINOR_OP_READ_1_1_4:
+		return SEQID_QUAD_READ;
 	default:
 		if (cmd == q->nor[0].erase_opcode)
 			return SEQID_SE;
@@ -676,11 +714,16 @@  static void fsl_qspi_set_map_addr(struct fsl_qspi *q)
  * the memcpy to read the data directly. A "missed" access to the buffer
  * causes the controller to clear the buffer, and use the sequence pointed
  * by the QUADSPI_BFGENCR[SEQID] to initiate a read from the flash.
+ *
+ * When using two ports, the SEQID to use for one port might differ from
+ * the one to use for the other (e.g., if one port can do 4-pad reads but
+ * the other cannot). So we set up a basic mode here (SEQID_READ) and we
+ * will set up the proper SEQID for the port right before doing the AHB
+ * access(es).
  */
 static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
 {
 	void __iomem *base = q->iobase;
-	int seqid;
 
 	/* AHB configuration for access buffer 0/1/2 .*/
 	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
@@ -688,11 +731,13 @@  static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
 	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
 	/*
 	 * Set ADATSZ with the maximum AHB buffer size to improve the
-	 * read performance.
+	 * read performance, except when the controller should not use
+	 * more than half its RX fifo in AHB reads, in which case read
+	 * size is given in the LUT FSL_READ instructions.
 	 */
 	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
-			((q->devtype_data->ahb_buf_size / 8)
-			<< QUADSPI_BUF3CR_ADATSZ_SHIFT),
+			(needs_half_fifo(q)? 0 : q->devtype_data->ahb_buf_size / 8)
+	        << QUADSPI_BUF3CR_ADATSZ_SHIFT,
 			base + QUADSPI_BUF3CR);
 
 	/* We only use the buffer3 */
@@ -700,9 +745,8 @@  static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
 	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
 	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
 
-	/* Set the default lut sequence for AHB Read. */
-	seqid = fsl_qspi_get_seqid(q, q->nor[0].read_opcode);
-	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
+	/* Set the default lut sequence for AHB Read to READ. */
+	qspi_writel(q, SEQID_READ << QUADSPI_BFGENCR_SEQID_SHIFT,
 		q->iobase + QUADSPI_BFGENCR);
 }
 
@@ -841,6 +885,7 @@  static int fsl_qspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
 		return ret;
 
 	fsl_qspi_read_data(q, len, buf);
+
 	return 0;
 }
 
@@ -887,11 +932,27 @@  static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
 {
 	struct fsl_qspi *q = nor->priv;
 	u8 cmd = nor->read_opcode;
+	uint32_t seqid;
+	size_t qlen = q->nor_size * 4;
+	int nor_idx = nor - q->nor;
+	size_t nor_ofs = q->nor_size * nor_idx;
+
+	/* Set the actual lut sequence for AHB Read from the considered nor. */
+	seqid = fsl_qspi_get_seqid(q, nor->read_opcode);
+	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
+		q->iobase + QUADSPI_BFGENCR);
+	/* Reset the AHB sequence pointer */
+	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC_MASK, q->iobase + QUADSPI_SPTRCLR);
+    /* make sure the Rx buffer is read through AHB, not IP */
+	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, q->iobase + QUADSPI_RBCT);
+
+	/* set the chip address for READID */
+	fsl_qspi_set_base_addr(q, q->nor);
 
 	/* if necessary,ioremap buffer before AHB read, */
 	if (!q->ahb_addr) {
-		q->memmap_offs = q->chip_base_addr + from;
-		q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
+		q->memmap_offs = q->chip_base_addr;
+		q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
 
 		q->ahb_addr = ioremap_nocache(
 				q->memmap_phy + q->memmap_offs,
@@ -906,8 +967,8 @@  static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
 			q->memmap_offs + q->memmap_len) {
 		iounmap(q->ahb_addr);
 
-		q->memmap_offs = q->chip_base_addr + from;
-		q->memmap_len = len > QUADSPI_MIN_IOMAP ? len : QUADSPI_MIN_IOMAP;
+		q->memmap_offs = q->chip_base_addr;
+		q->memmap_len = qlen > QUADSPI_MIN_IOMAP ? qlen : QUADSPI_MIN_IOMAP;
 		q->ahb_addr = ioremap_nocache(
 				q->memmap_phy + q->memmap_offs,
 				q->memmap_len);
@@ -917,13 +978,11 @@  static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from,
 		}
 	}
 
-	dev_dbg(q->dev, "cmd [%x],read from %p, len:%zd\n",
-		cmd, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
-		len);
+	dev_dbg(q->dev, "cmd [%x], read from %p, len:%zd\n",
+		cmd, q->ahb_addr + nor_ofs + from, len);
 
 	/* Read out the data directly from the AHB buffer.*/
-	memcpy(buf, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs,
-		len);
+	memcpy(buf, q->ahb_addr + nor_ofs + from, len);
 
 	return len;
 }
@@ -980,6 +1039,11 @@  static int fsl_qspi_probe(struct platform_device *pdev)
 	struct spi_nor *nor;
 	struct mtd_info *mtd;
 	int ret, i = 0;
+	const struct of_device_id *of_id =
+			of_match_device(fsl_qspi_dt_ids, &pdev->dev);
+	enum read_mode readmode;
+	unsigned int buswidth;
+	u32 qspimaxfreq, spimaxfreq;
 
 	q = devm_kzalloc(dev, sizeof(*q), GFP_KERNEL);
 	if (!q)
@@ -1050,6 +1114,9 @@  static int fsl_qspi_probe(struct platform_device *pdev)
 
 	mutex_init(&q->lock);
 
+	/* Running MIN of all "spi-max-frequency' values present in subnodes */
+	qspimaxfreq = INT_MAX;
+
 	/* iterate the subnodes. */
 	for_each_available_child_of_node(dev->of_node, np) {
 		/* skip the holes */
@@ -1073,15 +1140,44 @@  static int fsl_qspi_probe(struct platform_device *pdev)
 		nor->prepare = fsl_qspi_prep;
 		nor->unprepare = fsl_qspi_unprep;
 
+		/* Update running MIN of subnode max frequencies */
 		ret = of_property_read_u32(np, "spi-max-frequency",
-				&q->clk_rate);
+				&spimaxfreq);
 		if (ret < 0)
 			goto mutex_failed;
+		qspimaxfreq = (qspimaxfreq > spimaxfreq) ? spimaxfreq: qspimaxfreq;
 
 		/* set the chip address for READID */
 		fsl_qspi_set_base_addr(q, nor);
 
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+		/* scan using the proper read mode for this subnode */
+		if (of_property_read_u32(np, "spi-bus-width", &buswidth)<0) {
+			nor->read_opcode = SPINOR_OP_READ;
+			readmode = SPI_NOR_NORMAL;
+		} else {
+			switch (buswidth) {
+			case 4:
+				nor->read_opcode = SPINOR_OP_READ_1_1_4;
+				readmode = SPI_NOR_QUAD;
+				break;
+			case 2:
+				nor->read_opcode = SPINOR_OP_READ_1_1_2;
+				readmode = SPI_NOR_DUAL;
+				break;
+			case 1:
+				nor->read_opcode = SPINOR_OP_READ_FAST;
+				readmode = SPI_NOR_FAST;
+				break;
+			default:
+				dev_warn(q->dev,
+					"ignoring spi-bus-width=%d (should be 1, 2 or 4)",
+					buswidth);
+				nor->read_opcode = SPINOR_OP_READ;
+				readmode = SPI_NOR_NORMAL;
+			}
+		}
+
+		ret = spi_nor_scan(nor, NULL, readmode);
 		if (ret)
 			goto mutex_failed;
 
@@ -1112,6 +1208,10 @@  static int fsl_qspi_probe(struct platform_device *pdev)
 		i++;
 	}
 
+	/* If we know the lowest subnode frequency, override driver default. */
+	if (qspimaxfreq<INT_MAX)
+		q->clk_rate = qspimaxfreq;
+
 	/* finish the rest init. */
 	ret = fsl_qspi_nor_setup_last(q);
 	if (ret)