Patchwork [U-Boot,v3] spi/cadence: Adding Cadence SPI driver support for SOCFPGA

login
register
mail settings
Submitter Chin Liang See
Date Jan. 10, 2014, 5:39 p.m.
Message ID <1389375548-9167-1-git-send-email-clsee@altera.com>
Download mbox | patch
Permalink /patch/309359/
State Superseded
Delegated to: Jagannadha Sutradharudu Teki
Headers show

Comments

Chin Liang See - Jan. 10, 2014, 5:39 p.m.
To add the Cadence SPI driver support for Altera SOCFPGA. It
required information such as clocks and timing from platform's
configuration header file within include/configs folder

Signed-off-by: Chin Liang See <clsee@altera.com>
Cc: Jagan Teki <jagannadh.teki@gmail.com>
Cc: Gerhard Sittig <gsi@denx.de>
---
Changes for v3
- Moved the documentation from doc folder to driver
- Documented down macro specific to driver only
Changes for v2
- Combine driver into single C file instead of 2
- Added documentation on the macro used
- Using structure for registers instead of macro
---
 drivers/spi/Makefile       |    1 +
 drivers/spi/cadence_qspi.c | 1018 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/spi/cadence_qspi.h |  196 +++++++++
 3 files changed, 1215 insertions(+)
 create mode 100644 drivers/spi/cadence_qspi.c
 create mode 100644 drivers/spi/cadence_qspi.h
Chin Liang See - Feb. 4, 2014, 7:12 p.m.
Hi Jagan,

As there are no further comments, would need your help to apply this
patch. Thanks and appreciate for your support.

Chin Liang


On Fri, 2014-01-10 at 11:39 -0600, Chin Liang See wrote:
> To add the Cadence SPI driver support for Altera SOCFPGA. It
> required information such as clocks and timing from platform's
> configuration header file within include/configs folder
> 
> Signed-off-by: Chin Liang See <clsee@altera.com>
> Cc: Jagan Teki <jagannadh.teki@gmail.com>
> Cc: Gerhard Sittig <gsi@denx.de>
> ---
> Changes for v3
> - Moved the documentation from doc folder to driver
> - Documented down macro specific to driver only
> Changes for v2
> - Combine driver into single C file instead of 2
> - Added documentation on the macro used
> - Using structure for registers instead of macro
> ---
>  drivers/spi/Makefile       |    1 +
>  drivers/spi/cadence_qspi.c | 1018 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/spi/cadence_qspi.h |  196 +++++++++
>  3 files changed, 1215 insertions(+)
>  create mode 100644 drivers/spi/cadence_qspi.c
>  create mode 100644 drivers/spi/cadence_qspi.h
> 
> diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
> index ed4ecd7..b8d56ea 100644
> --- a/drivers/spi/Makefile
> +++ b/drivers/spi/Makefile
> @@ -15,6 +15,7 @@ obj-$(CONFIG_ATMEL_DATAFLASH_SPI) += atmel_dataflash_spi.o
>  obj-$(CONFIG_ATMEL_SPI) += atmel_spi.o
>  obj-$(CONFIG_BFIN_SPI) += bfin_spi.o
>  obj-$(CONFIG_BFIN_SPI6XX) += bfin_spi6xx.o
> +obj-$(CONFIG_CADENCE_QSPI) += cadence_qspi.o
>  obj-$(CONFIG_CF_SPI) += cf_spi.o
>  obj-$(CONFIG_CF_QSPI) += cf_qspi.o
>  obj-$(CONFIG_DAVINCI_SPI) += davinci_spi.o
> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
> new file mode 100644
> index 0000000..4712b45
> --- /dev/null
> +++ b/drivers/spi/cadence_qspi.c
> @@ -0,0 +1,1018 @@
> +/*
> + * (C) Copyright 2014 Altera Corporation <www.altera.com>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <asm/io.h>
> +#include <asm/errno.h>
> +#include <malloc.h>
> +#include <spi.h>
> +#include "cadence_qspi.h"
> +
> +static int qspi_is_init;
> +static unsigned int qspi_calibrated_hz;
> +static unsigned int qspi_calibrated_cs;
> +
> +static const struct cadence_qspi *cadence_qspi_base = (void *)QSPI_BASE;
> +
> +#define to_cadence_qspi_slave(s)		\
> +		container_of(s, struct cadence_qspi_slave, slave)
> +
> +#define CQSPI_CAL_DELAY(tdelay_ns, tref_ns, tsclk_ns)	\
> +	((((tdelay_ns) - (tsclk_ns)) / (tref_ns)))
> +
> +#define CQSPI_GET_WR_SRAM_LEVEL()		\
> +	((readl(&cadence_qspi_base->sramfill) >>	\
> +	CQSPI_REG_SRAMLEVEL_WR_LSB) & CQSPI_REG_SRAMLEVEL_WR_MASK)
> +
> +static unsigned int cadence_qspi_apb_cmd2addr(const unsigned char *addr_buf,
> +	unsigned int addr_width)
> +{
> +	unsigned int addr;
> +
> +	addr = (addr_buf[0] << 16) | (addr_buf[1] << 8) | addr_buf[2];
> +
> +	if (addr_width == 4)
> +		addr = (addr << 8) | addr_buf[3];
> +
> +	return addr;
> +}
> +
> +static void cadence_qspi_apb_read_fifo_data(void *dest,
> +	const void *src_ahb_addr, unsigned int bytes)
> +{
> +	unsigned int temp;
> +	int remaining = bytes;
> +	unsigned int *dest_ptr = (unsigned int *)dest;
> +	unsigned int *src_ptr = (unsigned int *)src_ahb_addr;
> +
> +	while (remaining > 0) {
> +		if (remaining >= CQSPI_FIFO_WIDTH) {
> +			*dest_ptr = readl(src_ptr);
> +			remaining -= CQSPI_FIFO_WIDTH;
> +		} else {
> +			/* dangling bytes */
> +			temp = readl(src_ptr);
> +			memcpy(dest_ptr, &temp, remaining);
> +			break;
> +		}
> +		dest_ptr++;
> +	}
> +
> +	return;
> +}
> +
> +static void cadence_qspi_apb_write_fifo_data(const void *dest_ahb_addr,
> +	const void *src, unsigned int bytes)
> +{
> +	unsigned int temp;
> +	int remaining = bytes;
> +	unsigned int *dest_ptr = (unsigned int *)dest_ahb_addr;
> +	unsigned int *src_ptr = (unsigned int *)src;
> +
> +	while (remaining > 0) {
> +		if (remaining >= CQSPI_FIFO_WIDTH) {
> +			writel(*src_ptr, dest_ptr);
> +			remaining -= sizeof(unsigned int);
> +		} else {
> +			/* dangling bytes */
> +			memcpy(&temp, src_ptr, remaining);
> +			writel(temp, dest_ptr);
> +			break;
> +		}
> +		src_ptr++;
> +	}
> +
> +	return;
> +}
> +
> +/* Read from SRAM FIFO with polling SRAM fill level. */
> +static int qspi_read_sram_fifo_poll(void *dest_addr,
> +			const void *src_addr,  unsigned int num_bytes)
> +{
> +	unsigned int remaining = num_bytes;
> +	unsigned int retry;
> +	unsigned int sram_level = 0;
> +	unsigned char *dest = (unsigned char *)dest_addr;
> +
> +	while (remaining > 0) {
> +		retry = CQSPI_REG_RETRY;
> +		while (retry--) {
> +			sram_level = (readl(&cadence_qspi_base->sramfill) >>
> +				CQSPI_REG_SRAMLEVEL_RD_LSB) &
> +				CQSPI_REG_SRAMLEVEL_RD_MASK;
> +			if (sram_level)
> +				break;
> +			udelay(1);
> +		}
> +
> +		if (!retry) {
> +			printf("QSPI: No receive data after polling for %d "
> +				"times\n", CQSPI_REG_RETRY);
> +			return -1;
> +		}
> +
> +		sram_level *= CQSPI_FIFO_WIDTH;
> +		sram_level = sram_level > remaining ? remaining : sram_level;
> +
> +		/* Read data from FIFO. */
> +		cadence_qspi_apb_read_fifo_data(dest, src_addr, sram_level);
> +		dest += sram_level;
> +		remaining -= sram_level;
> +		udelay(1);
> +	}
> +	return 0;
> +}
> +
> +
> +/* Write to SRAM FIFO with polling SRAM fill level. */
> +static int qpsi_write_sram_fifo_push(void *dest_addr,
> +				const void *src_addr, unsigned int num_bytes)
> +{
> +	unsigned int retry = CQSPI_REG_RETRY;
> +	unsigned int sram_level;
> +	unsigned int wr_bytes;
> +	unsigned char *src = (unsigned char *)src_addr;
> +	int remaining = num_bytes;
> +	unsigned int page_size = CONFIG_CQSPI_PAGE_SIZE;
> +	unsigned int sram_threshold_words = CQSPI_REG_SRAM_THRESHOLD_WORDS;
> +
> +	while (remaining > 0) {
> +		retry = CQSPI_REG_RETRY;
> +		while (retry--) {
> +			sram_level = CQSPI_GET_WR_SRAM_LEVEL();
> +			if (sram_level <= sram_threshold_words)
> +				break;
> +		}
> +		if (!retry) {
> +			printf("QSPI: SRAM fill level (0x%08x) "
> +				"not hit lower expected level (0x%08x)",
> +				sram_level, sram_threshold_words);
> +			return -1;
> +		}
> +		/* Write a page or remaining bytes. */
> +		wr_bytes = (remaining > page_size) ?
> +					page_size : remaining;
> +
> +		cadence_qspi_apb_write_fifo_data(dest_addr, src, wr_bytes);
> +		src += wr_bytes;
> +		remaining -= wr_bytes;
> +	}
> +
> +	return 0;
> +}
> +
> +static void cadence_qspi_apb_controller_enable(void)
> +{
> +	setbits_le32(&cadence_qspi_base->cfg, CQSPI_REG_CONFIG_ENABLE_MASK);
> +}
> +
> +static void cadence_qspi_apb_controller_disable(void)
> +{
> +	clrbits_le32(&cadence_qspi_base->cfg, CQSPI_REG_CONFIG_ENABLE_MASK);
> +}
> +
> +/* Return 1 if idle, otherwise return 0 (busy). */
> +static unsigned int cadence_qspi_wait_idle(void)
> +{
> +	unsigned int start, count = 0;
> +	/* timeout in unit of ms */
> +	unsigned int timeout = 5000;
> +
> +	start = get_timer(0);
> +	for ( ; get_timer(start) < timeout ; ) {
> +		if ((readl(&cadence_qspi_base->cfg) >>
> +			CQSPI_REG_CONFIG_IDLE_LSB) & 0x1)
> +			count++;
> +		else
> +			count = 0;
> +		/*
> +		 * Ensure the QSPI controller is in true idle state after
> +		 * reading back the same idle status consecutively
> +		 */
> +		if (count >= CQSPI_POLL_IDLE_RETRY)
> +			return 1;
> +	}
> +
> +	/* Timeout, still in busy mode. */
> +	printf("QSPI: QSPI is still busy after poll for %d times.\n",
> +		CQSPI_REG_RETRY);
> +	return 0;
> +}
> +
> +static void cadence_qspi_apb_readdata_capture(unsigned int bypass,
> +	unsigned int delay)
> +{
> +	unsigned int reg;
> +	cadence_qspi_apb_controller_disable();
> +
> +	reg = readl(&cadence_qspi_base->rddatacap);
> +
> +	if (bypass)
> +		reg |= (1 << CQSPI_REG_RD_DATA_CAPTURE_BYPASS_LSB);
> +	else
> +		reg &= ~(1 << CQSPI_REG_RD_DATA_CAPTURE_BYPASS_LSB);
> +
> +	reg &= ~(CQSPI_REG_RD_DATA_CAPTURE_DELAY_MASK
> +		<< CQSPI_REG_RD_DATA_CAPTURE_DELAY_LSB);
> +
> +	reg |= ((delay & CQSPI_REG_RD_DATA_CAPTURE_DELAY_MASK)
> +		<< CQSPI_REG_RD_DATA_CAPTURE_DELAY_LSB);
> +
> +	writel(reg, &cadence_qspi_base->rddatacap);
> +
> +	cadence_qspi_apb_controller_enable();
> +	return;
> +}
> +
> +static void cadence_qspi_apb_config_baudrate_div(unsigned int ref_clk_hz,
> +	unsigned int sclk_hz)
> +{
> +	unsigned int reg;
> +	unsigned int div;
> +
> +	cadence_qspi_apb_controller_disable();
> +	reg = readl(&cadence_qspi_base->cfg);
> +	reg &= ~(CQSPI_REG_CONFIG_BAUD_MASK << CQSPI_REG_CONFIG_BAUD_LSB);
> +
> +	div = ref_clk_hz / sclk_hz;
> +
> +	if (div > 32)
> +		div = 32;
> +
> +	/* Check if even number. */
> +	if ((div & 1))
> +		div = (div / 2);
> +	else
> +		div = (div / 2) - 1;
> +
> +	debug("%s: ref_clk %dHz sclk %dHz Div 0x%x\n", __func__,
> +		ref_clk_hz, sclk_hz, div);
> +
> +	div = (div & CQSPI_REG_CONFIG_BAUD_MASK) << CQSPI_REG_CONFIG_BAUD_LSB;
> +	reg |= div;
> +	writel(reg, &cadence_qspi_base->cfg);
> +
> +	cadence_qspi_apb_controller_enable();
> +	return;
> +}
> +
> +static void cadence_qspi_apb_set_clk_mode(unsigned int clk_pol,
> +	unsigned int clk_pha)
> +{
> +	unsigned int reg;
> +
> +	cadence_qspi_apb_controller_disable();
> +	reg = readl(&cadence_qspi_base->cfg);
> +	reg &= ~(1 <<
> +		(CQSPI_REG_CONFIG_CLK_POL_LSB | CQSPI_REG_CONFIG_CLK_PHA_LSB));
> +
> +	reg |= ((clk_pol & 0x1) << CQSPI_REG_CONFIG_CLK_POL_LSB);
> +	reg |= ((clk_pha & 0x1) << CQSPI_REG_CONFIG_CLK_PHA_LSB);
> +
> +	writel(reg, &cadence_qspi_base->cfg);
> +
> +	cadence_qspi_apb_controller_enable();
> +	return;
> +}
> +
> +static void cadence_qspi_apb_chipselect(unsigned int chip_select,
> +	unsigned int decoder_enable)
> +{
> +	unsigned int reg;
> +
> +	cadence_qspi_apb_controller_disable();
> +
> +	debug("%s : chipselect %d decode %d\n", __func__, chip_select,
> +		decoder_enable);
> +
> +	reg = readl(&cadence_qspi_base->cfg);
> +	/* docoder */
> +	if (decoder_enable)
> +		reg |= CQSPI_REG_CONFIG_DECODE_MASK;
> +	else {
> +		reg &= ~CQSPI_REG_CONFIG_DECODE_MASK;
> +		/* Convert CS if without decoder.
> +		 * CS0 to 4b'1110
> +		 * CS1 to 4b'1101
> +		 * CS2 to 4b'1011
> +		 * CS3 to 4b'0111
> +		 */
> +		chip_select = 0xF & ~(1 << chip_select);
> +	}
> +
> +	reg &= ~(CQSPI_REG_CONFIG_CHIPSELECT_MASK
> +			<< CQSPI_REG_CONFIG_CHIPSELECT_LSB);
> +	reg |= (chip_select & CQSPI_REG_CONFIG_CHIPSELECT_MASK)
> +			<< CQSPI_REG_CONFIG_CHIPSELECT_LSB;
> +	writel(reg, &cadence_qspi_base->cfg);
> +
> +	cadence_qspi_apb_controller_enable();
> +	return;
> +}
> +
> +static void cadence_qspi_apb_delay(unsigned int ref_clk, unsigned int sclk_hz,
> +	unsigned int tshsl_ns, unsigned int tsd2d_ns,
> +	unsigned int tchsh_ns, unsigned int tslch_ns)
> +{
> +	unsigned int ref_clk_ns;
> +	unsigned int sclk_ns;
> +	unsigned int tshsl, tchsh, tslch, tsd2d;
> +	unsigned int reg;
> +
> +	cadence_qspi_apb_controller_disable();
> +
> +	/* Convert to ns. */
> +	ref_clk_ns = (1000000000) / ref_clk;
> +
> +	/* Convert to ns. */
> +	sclk_ns = (1000000000) / sclk_hz;
> +
> +	/* Plus 1 to round up 1 clock cycle. */
> +	tshsl = CQSPI_CAL_DELAY(tshsl_ns, ref_clk_ns, sclk_ns) + 1;
> +	tchsh = CQSPI_CAL_DELAY(tchsh_ns, ref_clk_ns, sclk_ns) + 1;
> +	tslch = CQSPI_CAL_DELAY(tslch_ns, ref_clk_ns, sclk_ns) + 1;
> +	tsd2d = CQSPI_CAL_DELAY(tsd2d_ns, ref_clk_ns, sclk_ns) + 1;
> +
> +	reg = ((tshsl & CQSPI_REG_DELAY_TSHSL_MASK)
> +			<< CQSPI_REG_DELAY_TSHSL_LSB);
> +	reg |= ((tchsh & CQSPI_REG_DELAY_TCHSH_MASK)
> +			<< CQSPI_REG_DELAY_TCHSH_LSB);
> +	reg |= ((tslch & CQSPI_REG_DELAY_TSLCH_MASK)
> +			<< CQSPI_REG_DELAY_TSLCH_LSB);
> +	reg |= ((tsd2d & CQSPI_REG_DELAY_TSD2D_MASK)
> +			<< CQSPI_REG_DELAY_TSD2D_LSB);
> +	writel(reg, &cadence_qspi_base->delay);
> +
> +	cadence_qspi_apb_controller_enable();
> +	return;
> +}
> +
> +static void cadence_qspi_apb_controller_init(void)
> +{
> +	unsigned reg;
> +
> +	cadence_qspi_apb_controller_disable();
> +
> +	/* Configure the device size and address bytes */
> +	reg = readl(&cadence_qspi_base->devsz);
> +	/* Clear the previous value */
> +	reg &= ~(CQSPI_REG_SIZE_PAGE_MASK << CQSPI_REG_SIZE_PAGE_LSB);
> +	reg &= ~(CQSPI_REG_SIZE_BLOCK_MASK << CQSPI_REG_SIZE_BLOCK_LSB);
> +	reg |= (CONFIG_CQSPI_PAGE_SIZE << CQSPI_REG_SIZE_PAGE_LSB);
> +	reg |= (CONFIG_CQSPI_BLOCK_SIZE << CQSPI_REG_SIZE_BLOCK_LSB);
> +	writel(reg, &cadence_qspi_base->devsz);
> +
> +	/* Configure the remap address register, no remap */
> +	writel(0, &cadence_qspi_base->remapaddr);
> +
> +	/* Disable all interrupts */
> +	writel(0, &cadence_qspi_base->irqmask);
> +
> +	cadence_qspi_apb_controller_enable();
> +	return;
> +}
> +
> +static int cadence_qspi_apb_exec_flash_cmd(unsigned int reg)
> +{
> +	unsigned int retry = CQSPI_REG_RETRY;
> +
> +	/* Write the CMDCTRL without start execution. */
> +	writel(reg, &cadence_qspi_base->flashcmd);
> +	/* Start execute */
> +	reg |= CQSPI_REG_CMDCTRL_EXECUTE_MASK;
> +	writel(reg, &cadence_qspi_base->flashcmd);
> +
> +	while (retry--) {
> +		reg = readl(&cadence_qspi_base->flashcmd);
> +		if ((reg & CQSPI_REG_CMDCTRL_INPROGRESS_MASK) == 0)
> +			break;
> +		udelay(1);
> +	}
> +
> +	if (!retry) {
> +		printf("QSPI: flash command execution timeout\n");
> +		return -EIO;
> +	}
> +
> +	/* Polling QSPI idle status. */
> +	if (!cadence_qspi_wait_idle())
> +		return -EIO;
> +
> +	return 0;
> +}
> +
> +/* For command RDID, RDSR. */
> +static int cadence_qspi_apb_command_read(unsigned int cmdlen, const u8 *cmdbuf,
> +	unsigned int rxlen, u8 *rxbuf)
> +{
> +	unsigned int reg;
> +	unsigned int read_len;
> +	int status;
> +
> +	if (!cmdlen || rxlen > CQSPI_STIG_DATA_LEN_MAX || rxbuf == NULL) {
> +		printf("QSPI: Invalid input arguments cmdlen %d "
> +			"rxlen %d\n", cmdlen, rxlen);
> +		return -EINVAL;
> +	}
> +
> +	reg = cmdbuf[0] << CQSPI_REG_CMDCTRL_OPCODE_LSB;
> +
> +	reg |= (0x1 << CQSPI_REG_CMDCTRL_RD_EN_LSB);
> +
> +	/* 0 means 1 byte. */
> +	reg |= (((rxlen - 1) & CQSPI_REG_CMDCTRL_RD_BYTES_MASK)
> +		<< CQSPI_REG_CMDCTRL_RD_BYTES_LSB);
> +	status = cadence_qspi_apb_exec_flash_cmd(reg);
> +	if (status != 0)
> +		return status;
> +
> +	reg = readl(&cadence_qspi_base->flashcmdrddatalo);
> +
> +	/* Put the read value into rx_buf */
> +	read_len = (rxlen > 4) ? 4 : rxlen;
> +	memcpy(rxbuf, &reg, read_len);
> +	rxbuf += read_len;
> +
> +	if (rxlen > 4) {
> +		reg = readl(&cadence_qspi_base->flashcmdrddataup);
> +
> +		read_len = rxlen - read_len;
> +		memcpy(rxbuf, &reg, read_len);
> +	}
> +	return 0;
> +}
> +
> +/* For commands: WRSR, WREN, WRDI, CHIP_ERASE, BE, etc. */
> +static int cadence_qspi_apb_command_write(unsigned int cmdlen,
> +	const u8 *cmdbuf, unsigned int txlen,  const u8 *txbuf)
> +{
> +	unsigned int reg = 0;
> +	unsigned int addr_value;
> +	unsigned int wr_data;
> +	unsigned int wr_len;
> +
> +	if (!cmdlen || cmdlen > 5 || txlen > 8 || cmdbuf == NULL) {
> +		printf("QSPI: Invalid input arguments cmdlen %d txlen %d\n",
> +			cmdlen, txlen);
> +		return -EINVAL;
> +	}
> +
> +	reg |= cmdbuf[0] << CQSPI_REG_CMDCTRL_OPCODE_LSB;
> +
> +	if (cmdlen == 4 || cmdlen == 5) {
> +		/* Command with address */
> +		reg |= (0x1 << CQSPI_REG_CMDCTRL_ADDR_EN_LSB);
> +		/* Number of bytes to write. */
> +		reg |= ((cmdlen - 2) & CQSPI_REG_CMDCTRL_ADD_BYTES_MASK)
> +			<< CQSPI_REG_CMDCTRL_ADD_BYTES_LSB;
> +		/* Get address */
> +		addr_value = cadence_qspi_apb_cmd2addr(&cmdbuf[1],
> +			cmdlen >= 5 ? 4 : 3);
> +
> +		writel(addr_value, &cadence_qspi_base->flashcmdaddr);
> +	}
> +
> +	if (txlen) {
> +		/* writing data = yes */
> +		reg |= (0x1 << CQSPI_REG_CMDCTRL_WR_EN_LSB);
> +		reg |= ((txlen - 1) & CQSPI_REG_CMDCTRL_WR_BYTES_MASK)
> +			<< CQSPI_REG_CMDCTRL_WR_BYTES_LSB;
> +
> +		wr_len = txlen > 4 ? 4 : txlen;
> +		memcpy(&wr_data, txbuf, wr_len);
> +		writel(wr_data, &cadence_qspi_base->flashcmdwrdatalo);
> +
> +		if (txlen > 4) {
> +			txbuf += wr_len;
> +			wr_len = txlen - wr_len;
> +			memcpy(&wr_data, txbuf, wr_len);
> +			writel(wr_data, &cadence_qspi_base->flashcmdwrdataup);
> +		}
> +	}
> +
> +	/* Execute the command */
> +	return cadence_qspi_apb_exec_flash_cmd(reg);
> +}
> +
> +/* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
> +static int cadence_qspi_apb_indirect_read_setup(unsigned int ahb_phy_addr,
> +	unsigned int cmdlen, const u8 *cmdbuf)
> +{
> +	unsigned int reg;
> +	unsigned int rd_reg;
> +	unsigned int addr_value;
> +	unsigned int dummy_clk;
> +	unsigned int dummy_bytes;
> +	unsigned int addr_bytes;
> +
> +	/*
> +	 * Identify addr_byte. All NOR flash device drivers are using fast read
> +	 * which always expecting 1 dummy byte, 1 cmd byte and 3/4 addr byte.
> +	 * With that, the length is in value of 5 or 6. Only FRAM chip from
> +	 * ramtron using normal read (which won't need dummy byte).
> +	 * Unlikely NOR flash using normal read due to performance issue.
> +	 */
> +	if (cmdlen >= 5)
> +		/* to cater fast read where cmd + addr + dummy */
> +		addr_bytes = cmdlen - 2;
> +	else
> +		/* for normal read (only ramtron as of now) */
> +		addr_bytes = cmdlen - 1;
> +
> +	/* Setup the indirect trigger address */
> +	writel((ahb_phy_addr & CQSPI_INDIRECTTRIGGER_ADDR_MASK),
> +		&cadence_qspi_base->indaddrtrig);
> +
> +	/* Configure SRAM partition for read. */
> +	writel(CQSPI_REG_SRAM_PARTITION_RD, &cadence_qspi_base->srampart);
> +
> +	/* Configure the opcode */
> +	rd_reg = cmdbuf[0] << CQSPI_REG_RD_INSTR_OPCODE_LSB;
> +
> +#if (CONFIG_SPI_FLASH_QUAD == 1)
> +	/* Instruction and address at DQ0, data at DQ0-3. */
> +	rd_reg |= CQSPI_INST_TYPE_QUAD << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
> +#endif
> +
> +	/* Get address */
> +	addr_value = cadence_qspi_apb_cmd2addr(&cmdbuf[1], addr_bytes);
> +	writel(addr_value, &cadence_qspi_base->indrdstaddr);
> +
> +	/* The remaining lenght is dummy bytes. */
> +	dummy_bytes = cmdlen - addr_bytes - 1;
> +	if (dummy_bytes) {
> +
> +		if (dummy_bytes > CQSPI_DUMMY_BYTES_MAX)
> +			dummy_bytes = CQSPI_DUMMY_BYTES_MAX;
> +
> +		rd_reg |= (1 << CQSPI_REG_RD_INSTR_MODE_EN_LSB);
> +#if defined(CONFIG_SPL_SPI_XIP) && defined(CONFIG_SPL_BUILD)
> +		writel(0x0, &cadence_qspi_base->modebit);
> +#else
> +		writel(0xFF, &cadence_qspi_base->modebit);
> +#endif
> +
> +		/* Convert to clock cycles. */
> +		dummy_clk = dummy_bytes * CQSPI_DUMMY_CLKS_PER_BYTE;
> +		/* Need to minus the mode byte (8 clocks). */
> +		dummy_clk -= CQSPI_DUMMY_CLKS_PER_BYTE;
> +
> +		if (dummy_clk)
> +			rd_reg |= (dummy_clk & CQSPI_REG_RD_INSTR_DUMMY_MASK)
> +				<< CQSPI_REG_RD_INSTR_DUMMY_LSB;
> +	}
> +
> +	writel(rd_reg, &cadence_qspi_base->devrd);
> +
> +	/* set device size */
> +	reg = readl(&cadence_qspi_base->devsz);
> +	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
> +	reg |= (addr_bytes - 1);
> +	writel(reg, &cadence_qspi_base->devsz);
> +	return 0;
> +}
> +
> +static int cadence_qspi_apb_indirect_read_execute(void *ahb_base_addr,
> +	unsigned int rxlen, u8 *rxbuf)
> +{
> +	unsigned int reg;
> +
> +	writel(rxlen, &cadence_qspi_base->indrdcnt);
> +
> +	/* Start the indirect read transfer */
> +	writel(CQSPI_REG_INDIRECTRD_START_MASK,
> +			&cadence_qspi_base->indrd);
> +
> +	if (qspi_read_sram_fifo_poll((void *)rxbuf,
> +				(const void *)ahb_base_addr, rxlen)) {
> +		goto failrd;
> +	}
> +
> +	/* Check flash indirect controller */
> +	reg = readl(&cadence_qspi_base->indrd);
> +	if (!(reg & CQSPI_REG_INDIRECTRD_DONE_MASK)) {
> +		reg = readl(&cadence_qspi_base->indrd);
> +		printf("QSPI: indirect completion status "
> +			"error with reg 0x%08x\n", reg);
> +		goto failrd;
> +	}
> +
> +	/* Clear indirect completion status */
> +	writel(CQSPI_REG_INDIRECTRD_DONE_MASK, &cadence_qspi_base->indrd);
> +	return 0;
> +
> +failrd:
> +	/* Cancel the indirect read */
> +	writel(CQSPI_REG_INDIRECTRD_CANCEL_MASK, &cadence_qspi_base->indrd);
> +	return -1;
> +}
> +
> +/* Opcode + Address (3/4 bytes) */
> +static int cadence_qspi_apb_indirect_write_setup(unsigned int ahb_phy_addr,
> +	unsigned int cmdlen, const u8 *cmdbuf)
> +{
> +	unsigned int reg;
> +	unsigned int addr_bytes = cmdlen > 4 ? 4 : 3;
> +
> +	if (cmdlen < 4 || cmdbuf == NULL) {
> +		printf("QSPI: iInvalid input argument, len %d cmdbuf 0x%08x\n",
> +			cmdlen, (unsigned int)cmdbuf);
> +		return -EINVAL;
> +	}
> +	/* Setup the indirect trigger address */
> +	writel((ahb_phy_addr & CQSPI_INDIRECTTRIGGER_ADDR_MASK),
> +		&cadence_qspi_base->indaddrtrig);
> +
> +	writel(CQSPI_REG_SRAM_PARTITION_WR,
> +		&cadence_qspi_base->srampart);
> +
> +	/* Configure the opcode */
> +	reg = cmdbuf[0] << CQSPI_REG_WR_INSTR_OPCODE_LSB;
> +	writel(reg, &cadence_qspi_base->devwr);
> +
> +	/* Setup write address. */
> +	reg = cadence_qspi_apb_cmd2addr(&cmdbuf[1], addr_bytes);
> +	writel(reg, &cadence_qspi_base->indwrstaddr);
> +
> +	reg = readl(&cadence_qspi_base->devsz);
> +	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
> +	reg |= (addr_bytes - 1);
> +	writel(reg, &cadence_qspi_base->devsz);
> +	return 0;
> +}
> +
> +static int cadence_qspi_apb_indirect_write_execute(void *ahb_base_addr,
> +	unsigned int txlen, const u8 *txbuf)
> +{
> +	unsigned int reg = 0;
> +	unsigned int retry;
> +
> +	/* Configure the indirect read transfer bytes */
> +	writel(txlen, &cadence_qspi_base->indwrcnt);
> +
> +	/* Start the indirect write transfer */
> +	writel(CQSPI_REG_INDIRECTWR_START_MASK,	&cadence_qspi_base->indwr);
> +
> +	if (qpsi_write_sram_fifo_push(ahb_base_addr,
> +		(const void *)txbuf, txlen)) {
> +		goto failwr;
> +	}
> +
> +	/* Wait until last write is completed (FIFO empty) */
> +	retry = CQSPI_REG_RETRY;
> +	while (retry--) {
> +		reg = CQSPI_GET_WR_SRAM_LEVEL();
> +		if (reg == 0)
> +			break;
> +
> +		udelay(1);
> +	}
> +	if (reg != 0) {
> +		printf("QSPI: timeout for indirect write\n");
> +		goto failwr;
> +	}
> +
> +	/* Check flash indirect controller status */
> +	retry = CQSPI_REG_RETRY;
> +	while (retry--) {
> +		reg = readl(&cadence_qspi_base->indwr);
> +		if (reg & CQSPI_REG_INDIRECTWR_DONE_MASK)
> +			break;
> +		udelay(1);
> +	}
> +	if (!(reg & CQSPI_REG_INDIRECTWR_DONE_MASK)) {
> +		printf("QSPI: indirect completion "
> +			"status error with reg 0x%08x\n", reg);
> +		goto failwr;
> +	}
> +
> +	/* Clear indirect completion status */
> +	writel(CQSPI_REG_INDIRECTWR_DONE_MASK, &cadence_qspi_base->indwr);
> +	return 0;
> +
> +failwr:
> +	/* Cancel the indirect write */
> +	writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK, &cadence_qspi_base->indwr);
> +	return -1;
> +}
> +
> +static void cadence_qspi_apb_enter_xip(char xip_dummy)
> +{
> +	unsigned int reg;
> +
> +	/* enter XiP mode immediately and enable direct mode */
> +	reg = readl(&cadence_qspi_base->cfg);
> +	reg |= CQSPI_REG_CONFIG_ENABLE_MASK;
> +	reg |= CQSPI_REG_CONFIG_DIRECT_MASK;
> +	reg |= CQSPI_REG_CONFIG_XIP_IMM_MASK;
> +	writel(reg, &cadence_qspi_base->cfg);
> +
> +	/* keep the XiP mode */
> +	writel(xip_dummy, &cadence_qspi_base->modebit);
> +
> +	/* Enable mode bit at devrd */
> +	reg = readl(&cadence_qspi_base->devrd);
> +	reg |= (1 << CQSPI_REG_RD_INSTR_MODE_EN_LSB);
> +	writel(reg, &cadence_qspi_base->devrd);
> +}
> +
> +void spi_set_speed(struct spi_slave *slave, uint hz)
> +{
> +	cadence_qspi_apb_config_baudrate_div(CONFIG_CQSPI_REF_CLK, hz);
> +
> +	/* Reconfigure delay timing if speed is changed. */
> +	cadence_qspi_apb_delay(CONFIG_CQSPI_REF_CLK, hz,
> +		CONFIG_CQSPI_TSHSL_NS, CONFIG_CQSPI_TSD2D_NS,
> +		CONFIG_CQSPI_TCHSH_NS, CONFIG_CQSPI_TSLCH_NS);
> +	return;
> +}
> +
> +/* calibration sequence to determine the read data capture delay register */
> +int spi_calibration(struct spi_slave *slave)
> +{
> +	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
> +	u8 opcode_rdid = 0x9F;
> +	unsigned int idcode = 0, temp = 0;
> +	int err = 0, i, range_lo = -1, range_hi = -1;
> +
> +	/* start with slowest clock (1 MHz) */
> +	spi_set_speed(slave, 1000000);
> +
> +	/* configure the read data capture delay register to 0 */
> +	cadence_qspi_apb_readdata_capture(1, 0);
> +
> +	/* Enable QSPI */
> +	cadence_qspi_apb_controller_enable();
> +
> +	/* read the ID which will be our golden value */
> +	err = cadence_qspi_apb_command_read(1, &opcode_rdid,
> +		3, (u8 *)&idcode);
> +	if (err) {
> +		puts("SF: Calibration failed (read)\n");
> +		return err;
> +	}
> +
> +	/* use back the intended clock and find low range */
> +	spi_set_speed(slave, cadence_qspi->max_hz);
> +	for (i = 0; i < CQSPI_READ_CAPTURE_MAX_DELAY; i++) {
> +		/* Disable QSPI */
> +		cadence_qspi_apb_controller_disable();
> +
> +		/* reconfigure the read data capture delay register */
> +		cadence_qspi_apb_readdata_capture(1, i);
> +
> +		/* Enable back QSPI */
> +		cadence_qspi_apb_controller_enable();
> +
> +		/* issue a RDID to get the ID value */
> +		err = cadence_qspi_apb_command_read(1, &opcode_rdid,
> +			3, (u8 *)&temp);
> +		if (err) {
> +			puts("SF: Calibration failed (read)\n");
> +			return err;
> +		}
> +
> +		/* search for range lo */
> +		if (range_lo == -1 && temp == idcode) {
> +			range_lo = i;
> +			continue;
> +		}
> +
> +		/* search for range hi */
> +		if (range_lo != -1 && temp != idcode) {
> +			range_hi = i - 1;
> +			break;
> +		}
> +		range_hi = i;
> +	}
> +
> +	if (range_lo == -1) {
> +		puts("SF: Calibration failed (low range)\n");
> +		return err;
> +	}
> +
> +	/* Disable QSPI for subsequent initialization */
> +	cadence_qspi_apb_controller_disable();
> +
> +	/* configure the final value for read data capture delay register */
> +	cadence_qspi_apb_readdata_capture(1, (range_hi + range_lo) / 2);
> +	printf("SF: Read data capture delay calibrated to %i (%i - %i)\n",
> +		(range_hi + range_lo) / 2, range_lo, range_hi);
> +
> +	/* just to ensure we do once only when speed or chip select change */
> +	qspi_calibrated_hz = cadence_qspi->max_hz;
> +	qspi_calibrated_cs = slave->cs;
> +	return 0;
> +}
> +
> +int spi_cs_is_valid(unsigned int bus, unsigned int cs)
> +{
> +#if (CONFIG_CQSPI_DECODER == 1)
> +	if (((cs >= 0) && (cs < CQSPI_DECODER_MAX_CS)) && ((bus >= 0) &&
> +		(bus < CQSPI_DECODER_MAX_CS))) {
> +		return 1;
> +	}
> +#else
> +	if (((cs >= 0) && (cs < CQSPI_NO_DECODER_MAX_CS)) &&
> +		((bus >= 0) && (bus < CQSPI_NO_DECODER_MAX_CS))) {
> +		return 1;
> +	}
> +#endif
> +	printf("QSPI: Invalid bus or cs. Bus %d cs %d\n", bus, cs);
> +	return 0;
> +}
> +
> +void spi_cs_activate(struct spi_slave *slave)
> +{
> +	return;
> +}
> +
> +void spi_cs_deactivate(struct spi_slave *slave)
> +{
> +	return;
> +}
> +
> +void spi_init(void)
> +{
> +	cadence_qspi_apb_controller_init();
> +	qspi_is_init = 1;
> +	return;
> +}
> +
> +struct spi_slave *spi_setup_slave(unsigned int bus, unsigned int cs,
> +		unsigned int max_hz, unsigned int mode)
> +{
> +	struct cadence_qspi_slave *cadence_qspi;
> +
> +	debug("%s: bus %d cs %d max_hz %dMHz mode %d\n", __func__,
> +		bus, cs, max_hz/1000000, mode);
> +
> +	if (!spi_cs_is_valid(bus, cs))
> +		return NULL;
> +
> +	cadence_qspi = malloc(sizeof(struct cadence_qspi_slave));
> +	if (!cadence_qspi) {
> +		printf("QSPI: Can't allocate struct cadence_qspi_slave. "
> +			"Bus %d cs %d\n", bus, cs);
> +		return NULL;
> +	}
> +
> +	cadence_qspi->slave.bus = bus;
> +	cadence_qspi->slave.cs = cs;
> +	cadence_qspi->mode = mode;
> +	cadence_qspi->max_hz = max_hz;
> +	cadence_qspi->regbase = (void *)QSPI_BASE;
> +	cadence_qspi->ahbbase = (void *)QSPI_AHB_BASE;
> +
> +	if (!qspi_is_init)
> +		spi_init();
> +
> +	return &cadence_qspi->slave;
> +}
> +
> +void spi_free_slave(struct spi_slave *slave)
> +{
> +	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
> +	free(cadence_qspi);
> +	return;
> +}
> +
> +int spi_claim_bus(struct spi_slave *slave)
> +{
> +	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
> +	unsigned int clk_pol = (cadence_qspi->mode & SPI_CPOL) ? 1 : 0;
> +	unsigned int clk_pha = (cadence_qspi->mode & SPI_CPHA) ? 1 : 0;
> +	int err = 0;
> +
> +	debug("%s: bus:%i cs:%i\n", __func__, slave->bus, slave->cs);
> +
> +	/* Disable QSPI */
> +	cadence_qspi_apb_controller_disable();
> +
> +	/* Set Chip select */
> +	cadence_qspi_apb_chipselect(slave->cs, CONFIG_CQSPI_DECODER);
> +
> +	/* Set SPI mode */
> +	cadence_qspi_apb_set_clk_mode(clk_pol, clk_pha);
> +
> +	/* Set clock speed */
> +	spi_set_speed(slave, cadence_qspi->max_hz);
> +
> +	/* calibration required for different SCLK speed or chip select */
> +	if (qspi_calibrated_hz != cadence_qspi->max_hz ||
> +		qspi_calibrated_cs != slave->cs) {
> +		err = spi_calibration(slave);
> +		if (err)
> +			return err;
> +	}
> +
> +	/* Enable QSPI */
> +	cadence_qspi_apb_controller_enable();
> +
> +	return 0;
> +}
> +
> +void spi_release_bus(struct spi_slave *slave)
> +{
> +	return;
> +}
> +
> +int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *data_out,
> +		void *data_in, unsigned long flags)
> +{
> +	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
> +	void *ahbbase = cadence_qspi->ahbbase;
> +	u8 *cmd_buf = cadence_qspi->cmd_buf;
> +	size_t data_bytes;
> +	int err = 0;
> +	u32 mode = CQSPI_STIG_WRITE;
> +
> +	if (flags & SPI_XFER_BEGIN) {
> +		/* copy command to local buffer */
> +		cadence_qspi->cmd_len = bitlen / 8;
> +		memcpy(cmd_buf, data_out, cadence_qspi->cmd_len);
> +	}
> +
> +	if (flags == (SPI_XFER_BEGIN | SPI_XFER_END)) {
> +		/* if start and end bit are set, the data bytes is 0. */
> +		data_bytes = 0;
> +	} else {
> +		data_bytes = bitlen / 8;
> +	}
> +
> +	if ((flags & SPI_XFER_END) || (flags == 0)) {
> +		if (cadence_qspi->cmd_len == 0) {
> +			printf("QSPI: Error, command is empty.\n");
> +			return -1;
> +		}
> +
> +		if (data_in && data_bytes) {
> +			/* read */
> +			/* Use STIG if no address. */
> +			if (!CQSPI_IS_ADDR(cadence_qspi->cmd_len))
> +				mode = CQSPI_STIG_READ;
> +			else
> +				mode = CQSPI_INDIRECT_READ;
> +		} else if (data_out && !(flags & SPI_XFER_BEGIN)) {
> +			/* write */
> +			if (!CQSPI_IS_ADDR(cadence_qspi->cmd_len))
> +				mode = CQSPI_STIG_WRITE;
> +			else
> +				mode = CQSPI_INDIRECT_WRITE;
> +		}
> +
> +		switch (mode) {
> +		case CQSPI_STIG_READ:
> +			err = cadence_qspi_apb_command_read(
> +				cadence_qspi->cmd_len, cmd_buf,
> +				data_bytes, data_in);
> +
> +		break;
> +		case CQSPI_STIG_WRITE:
> +			err = cadence_qspi_apb_command_write(
> +				cadence_qspi->cmd_len, cmd_buf,
> +				data_bytes, data_out);
> +		break;
> +		case CQSPI_INDIRECT_READ:
> +			err = cadence_qspi_apb_indirect_read_setup(
> +				QSPI_AHB_BASE,
> +				cadence_qspi->cmd_len, cmd_buf);
> +			if (!err) {
> +				err = cadence_qspi_apb_indirect_read_execute
> +				(ahbbase, data_bytes, data_in);
> +			}
> +		break;
> +		case CQSPI_INDIRECT_WRITE:
> +			err = cadence_qspi_apb_indirect_write_setup
> +				(QSPI_AHB_BASE,
> +				cadence_qspi->cmd_len, cmd_buf);
> +			if (!err) {
> +				err = cadence_qspi_apb_indirect_write_execute
> +				(ahbbase, data_bytes, data_out);
> +			}
> +		break;
> +		default:
> +			err = -1;
> +			break;
> +		}
> +
> +		if (flags & SPI_XFER_END) {
> +			/* clear command buffer */
> +			memset(cmd_buf, 0, sizeof(cadence_qspi->cmd_buf));
> +			cadence_qspi->cmd_len = 0;
> +		}
> +	}
> +	return err;
> +}
> +
> +void spi_enter_xip(struct spi_slave *slave, char xip_dummy)
> +{
> +	/* Enter XiP */
> +	cadence_qspi_apb_enter_xip(xip_dummy);
> +	return;
> +}
> +
> +
> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
> new file mode 100644
> index 0000000..346837e
> --- /dev/null
> +++ b/drivers/spi/cadence_qspi.h
> @@ -0,0 +1,196 @@
> +/*
> + * (C) Copyright 2014 Altera Corporation <www.altera.com>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#ifndef __CADENCE_QSPI_H__
> +#define __CADENCE_QSPI_H__
> +
> +/*
> + * Macro required for this driver
> + *
> + * #define CONFIG_CQSPI_BASE		(SOCFPGA_QSPIREGS_ADDRESS)
> + * #define CONFIG_CQSPI_AHB_BASE		(SOCFPGA_QSPIDATA_ADDRESS)
> + * -> To specify base address for controller CSR base and AHB data base addr
> + *
> + * #define CONFIG_CQSPI_REF_CLK		(400000000)
> + * -> The clock frequency supplied from PLL to the QSPI controller
> + *
> + * #define CONFIG_CQSPI_PAGE_SIZE		(256)
> + * -> To define the page size of serial flash in bytes
> + *
> + * #define CONFIG_CQSPI_BLOCK_SIZE		(16)
> + * -> To define the block size of serial flash in pages
> + *
> + * #define CONFIG_CQSPI_DECODER		(0)
> + * -> To enable the 4-to-16 decoder which enable up to 16 serial flash devices
> + *
> + * #define CONFIG_CQSPI_TSHSL_NS		(200)
> + * #define CONFIG_CQSPI_TSD2D_NS		(255)
> + * #define CONFIG_CQSPI_TCHSH_NS		(20)
> + * #define CONFIG_CQSPI_TSLCH_NS		(20)
> + * -> Configure controller based on serial flash device timing characteristic
> + */
> +
> +#define QSPI_BASE				(CONFIG_CQSPI_BASE)
> +#define QSPI_AHB_BASE				(CONFIG_CQSPI_AHB_BASE)
> +#define CQSPI_IS_ADDR(cmd_len)			(cmd_len > 1 ? 1 : 0)
> +
> +struct cadence_qspi_slave {
> +	struct spi_slave slave;
> +	unsigned int	mode;
> +	unsigned int	max_hz;
> +	void		*regbase;
> +	void		*ahbbase;
> +	size_t		cmd_len;
> +	u8		cmd_buf[32];
> +	size_t		data_len;
> +};
> +
> +struct cadence_qspi {
> +	u32	cfg;
> +	u32	devrd;
> +	u32	devwr;
> +	u32	delay;
> +	u32	rddatacap;
> +	u32	devsz;
> +	u32	srampart;
> +	u32	indaddrtrig;
> +	u32	dmaper;
> +	u32	remapaddr;
> +	u32	modebit;
> +	u32	sramfill;
> +	u32	txthresh;
> +	u32	rxthresh;
> +	u32	_pad_0x38_0x3f[2];
> +	u32	irqstat;
> +	u32	irqmask;
> +	u32	_pad_0x48_0x4f[2];
> +	u32	lowwrprot;
> +	u32	uppwrprot;
> +	u32	wrprot;
> +	u32	_pad_0x5c_0x5f;
> +	u32	indrd;
> +	u32	indrdwater;
> +	u32	indrdstaddr;
> +	u32	indrdcnt;
> +	u32	indwr;
> +	u32	indwrwater;
> +	u32	indwrstaddr;
> +	u32	indwrcnt;
> +	u32	_pad_0x80_0x8f[4];
> +	u32	flashcmd;
> +	u32	flashcmdaddr;
> +	u32	_pad_0x98_0x9f[2];
> +	u32	flashcmdrddatalo;
> +	u32	flashcmdrddataup;
> +	u32	flashcmdwrdatalo;
> +	u32	flashcmdwrdataup;
> +	u32	_pad_0xb0_0xfb[19];
> +	u32	moduleid;
> +};
> +
> +/* Controller's configuration and status register */
> +#define	CQSPI_REG_CONFIG_CLK_POL_LSB		1
> +#define	CQSPI_REG_CONFIG_CLK_PHA_LSB		2
> +#define	CQSPI_REG_CONFIG_ENABLE_MASK		(1 << 0)
> +#define	CQSPI_REG_CONFIG_DIRECT_MASK		(1 << 7)
> +#define	CQSPI_REG_CONFIG_DECODE_MASK		(1 << 9)
> +#define	CQSPI_REG_CONFIG_XIP_IMM_MASK		(1 << 18)
> +#define	CQSPI_REG_CONFIG_CHIPSELECT_LSB		10
> +#define	CQSPI_REG_CONFIG_BAUD_LSB		19
> +#define	CQSPI_REG_CONFIG_IDLE_LSB		31
> +#define	CQSPI_REG_CONFIG_CHIPSELECT_MASK	0xF
> +#define	CQSPI_REG_CONFIG_BAUD_MASK		0xF
> +#define	CQSPI_REG_RD_INSTR_OPCODE_LSB		0
> +#define	CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB	8
> +#define	CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB	12
> +#define	CQSPI_REG_RD_INSTR_TYPE_DATA_LSB	16
> +#define	CQSPI_REG_RD_INSTR_MODE_EN_LSB		20
> +#define	CQSPI_REG_RD_INSTR_DUMMY_LSB		24
> +#define	CQSPI_REG_RD_INSTR_TYPE_INSTR_MASK	0x3
> +#define	CQSPI_REG_RD_INSTR_TYPE_ADDR_MASK	0x3
> +#define	CQSPI_REG_RD_INSTR_TYPE_DATA_MASK	0x3
> +#define	CQSPI_REG_RD_INSTR_DUMMY_MASK		0x1F
> +#define	CQSPI_REG_WR_INSTR_OPCODE_LSB		0
> +#define	CQSPI_REG_DELAY_TSLCH_LSB		0
> +#define	CQSPI_REG_DELAY_TCHSH_LSB		8
> +#define	CQSPI_REG_DELAY_TSD2D_LSB		16
> +#define	CQSPI_REG_DELAY_TSHSL_LSB		24
> +#define	CQSPI_REG_DELAY_TSLCH_MASK		0xFF
> +#define	CQSPI_REG_DELAY_TCHSH_MASK		0xFF
> +#define	CQSPI_REG_DELAY_TSD2D_MASK		0xFF
> +#define	CQSPI_REG_DELAY_TSHSL_MASK		0xFF
> +#define	CQSPI_REG_RD_DATA_CAPTURE_BYPASS_LSB	0
> +#define	CQSPI_REG_RD_DATA_CAPTURE_DELAY_LSB	1
> +#define	CQSPI_REG_RD_DATA_CAPTURE_DELAY_MASK	0xF
> +#define	CQSPI_REG_SIZE_ADDRESS_LSB		0
> +#define	CQSPI_REG_SIZE_PAGE_LSB			4
> +#define	CQSPI_REG_SIZE_BLOCK_LSB		16
> +#define	CQSPI_REG_SIZE_ADDRESS_MASK		0xF
> +#define	CQSPI_REG_SIZE_PAGE_MASK		0xFFF
> +#define	CQSPI_REG_SIZE_BLOCK_MASK		0x3F
> +#define	CQSPI_REG_SRAMLEVEL_RD_LSB		0
> +#define	CQSPI_REG_SRAMLEVEL_WR_LSB		16
> +#define	CQSPI_REG_SRAMLEVEL_RD_MASK		0xFFFF
> +#define	CQSPI_REG_SRAMLEVEL_WR_MASK		0xFFFF
> +#define	CQSPI_REG_INDIRECTRD_START_MASK		(1 << 0)
> +#define	CQSPI_REG_INDIRECTRD_CANCEL_MASK	(1 << 1)
> +#define	CQSPI_REG_INDIRECTRD_INPROGRESS_MASK	(1 << 2)
> +#define	CQSPI_REG_INDIRECTRD_DONE_MASK		(1 << 5)
> +#define	CQSPI_REG_CMDCTRL_EXECUTE_MASK		(1 << 0)
> +#define	CQSPI_REG_CMDCTRL_INPROGRESS_MASK	(1 << 1)
> +#define	CQSPI_REG_CMDCTRL_DUMMY_LSB		7
> +#define	CQSPI_REG_CMDCTRL_WR_BYTES_LSB		12
> +#define	CQSPI_REG_CMDCTRL_WR_EN_LSB		15
> +#define	CQSPI_REG_CMDCTRL_ADD_BYTES_LSB		16
> +#define	CQSPI_REG_CMDCTRL_ADDR_EN_LSB		19
> +#define	CQSPI_REG_CMDCTRL_RD_BYTES_LSB		20
> +#define	CQSPI_REG_CMDCTRL_RD_EN_LSB		23
> +#define	CQSPI_REG_CMDCTRL_OPCODE_LSB		24
> +#define	CQSPI_REG_CMDCTRL_DUMMY_MASK		0x1F
> +#define	CQSPI_REG_CMDCTRL_WR_BYTES_MASK		0x7
> +#define	CQSPI_REG_CMDCTRL_ADD_BYTES_MASK	0x3
> +#define	CQSPI_REG_CMDCTRL_RD_BYTES_MASK		0x7
> +#define	CQSPI_REG_CMDCTRL_OPCODE_MASK		0xFF
> +#define	CQSPI_REG_INDIRECTWR_START_MASK		(1 << 0)
> +#define	CQSPI_REG_INDIRECTWR_CANCEL_MASK	(1 << 1)
> +#define	CQSPI_REG_INDIRECTWR_INPROGRESS_MASK	(1 << 2)
> +#define	CQSPI_REG_INDIRECTWR_DONE_MASK		(1 << 5)
> +
> +/* Transfer type */
> +#define CQSPI_STIG_READ				0
> +#define CQSPI_STIG_WRITE			1
> +#define CQSPI_INDIRECT_READ			2
> +#define CQSPI_INDIRECT_WRITE			3
> +
> +/* Transfer mode */
> +#define CQSPI_INST_TYPE_SINGLE			(0)
> +#define CQSPI_INST_TYPE_DUAL			(1)
> +#define CQSPI_INST_TYPE_QUAD			(2)
> +
> +/* controller operation setting */
> +#define CQSPI_NO_DECODER_MAX_CS			(4)
> +#define CQSPI_DECODER_MAX_CS			(16)
> +#define CQSPI_READ_CAPTURE_MAX_DELAY		(16)
> +#define CQSPI_REG_POLL_US			(1)
> +#define CQSPI_REG_RETRY				(10000)
> +#define CQSPI_POLL_IDLE_RETRY			(3)
> +#define CQSPI_FIFO_WIDTH			(4)
> +#define CQSPI_STIG_DATA_LEN_MAX			(8)
> +#define CQSPI_INDIRECTTRIGGER_ADDR_MASK		(0xFFFFF)
> +#define CQSPI_DUMMY_CLKS_PER_BYTE		(8)
> +#define CQSPI_DUMMY_BYTES_MAX			(4)
> +
> +/* Controller sram size in word */
> +#define CQSPI_REG_SRAM_SIZE_WORD		(128)
> +#define CQSPI_REG_SRAM_RESV_WORDS		(2)
> +#define CQSPI_REG_SRAM_PARTITION_WR		(1)
> +#define CQSPI_REG_SRAM_PARTITION_RD		\
> +	(CQSPI_REG_SRAM_SIZE_WORD - CQSPI_REG_SRAM_RESV_WORDS)
> +#define CQSPI_REG_SRAM_THRESHOLD_WORDS		(50)
> +#define CQSPI_REG_SRAM_FILL_THRESHOLD	\
> +	((CQSPI_REG_SRAM_SIZE_WORD / 2) * CQSPI_FIFO_WIDTH)
> +
> +#endif /* __CADENCE_QSPI_H__ */
Gerhard Sittig - Feb. 13, 2014, 10:23 p.m.
Yes, I'm late to respond. :(

On Fri, Jan 10, 2014 at 11:39 -0600, Chin Liang See wrote:
> 
> To add the Cadence SPI driver support for Altera SOCFPGA. It
> required information such as clocks and timing from platform's
> configuration header file within include/configs folder

s/To add/Add/?
s/It required/It requires/?

> ---
>  drivers/spi/Makefile       |    1 +
>  drivers/spi/cadence_qspi.c | 1018 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/spi/cadence_qspi.h |  196 +++++++++
>  3 files changed, 1215 insertions(+)
>  create mode 100644 drivers/spi/cadence_qspi.c
>  create mode 100644 drivers/spi/cadence_qspi.h
> 
> diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
> index ed4ecd7..b8d56ea 100644
> --- a/drivers/spi/Makefile
> +++ b/drivers/spi/Makefile
> @@ -15,6 +15,7 @@ obj-$(CONFIG_ATMEL_DATAFLASH_SPI) += atmel_dataflash_spi.o
>  obj-$(CONFIG_ATMEL_SPI) += atmel_spi.o
>  obj-$(CONFIG_BFIN_SPI) += bfin_spi.o
>  obj-$(CONFIG_BFIN_SPI6XX) += bfin_spi6xx.o
> +obj-$(CONFIG_CADENCE_QSPI) += cadence_qspi.o
>  obj-$(CONFIG_CF_SPI) += cf_spi.o
>  obj-$(CONFIG_CF_QSPI) += cf_qspi.o
>  obj-$(CONFIG_DAVINCI_SPI) += davinci_spi.o

Is this a driver without a user, aka dead code?  There is no
CADENCE_QSPI in U-Boot master, neither does your patch introduce
it anywhere.  Am I missing a branch you build on top of?

> +static void cadence_qspi_apb_read_fifo_data(void *dest,
> +	const void *src_ahb_addr, unsigned int bytes)
> +{
> +	unsigned int temp;
> +	int remaining = bytes;
> +	unsigned int *dest_ptr = (unsigned int *)dest;
> +	unsigned int *src_ptr = (unsigned int *)src_ahb_addr;
> +
> +	while (remaining > 0) {
> +		if (remaining >= CQSPI_FIFO_WIDTH) {
> +			*dest_ptr = readl(src_ptr);
> +			remaining -= CQSPI_FIFO_WIDTH;
> +		} else {
> +			/* dangling bytes */
> +			temp = readl(src_ptr);
> +			memcpy(dest_ptr, &temp, remaining);
> +			break;
> +		}
> +		dest_ptr++;
> +	}
> +
> +	return;
> +}

These pointer casts look suspicious.  The code assumes that the
"void *" pointers are aligned like integer items would be.  This
may work for the port address, but I'd rather not do this for the
memory addresses.

> +/* Return 1 if idle, otherwise return 0 (busy). */
> +static unsigned int cadence_qspi_wait_idle(void)
> +{
> +	unsigned int start, count = 0;
> +	/* timeout in unit of ms */
> +	unsigned int timeout = 5000;
> +
> +	start = get_timer(0);
> +	for ( ; get_timer(start) < timeout ; ) {
> +		if ((readl(&cadence_qspi_base->cfg) >>
> +			CQSPI_REG_CONFIG_IDLE_LSB) & 0x1)
> +			count++;
> +		else
> +			count = 0;
> +		/*
> +		 * Ensure the QSPI controller is in true idle state after
> +		 * reading back the same idle status consecutively
> +		 */
> +		if (count >= CQSPI_POLL_IDLE_RETRY)
> +			return 1;
> +	}
> +
> +	/* Timeout, still in busy mode. */
> +	printf("QSPI: QSPI is still busy after poll for %d times.\n",
> +		CQSPI_REG_RETRY);
> +	return 0;
> +}

There are several style issues here.

A for() loop without a setup clause and without a re-iteration
clause would actually be a while() in disguise. At the very least
I'd suggest /* EMPTY */ comments to show that nothing was omitted
by chance, but by will.

There are whitespace issues in the indentation of continued
lines.  The if() looks like it had multi line branches without
braces, and makes readers stop and think what's wrong.

The idle flag test looks a little obfuscated.  I guess the more
popular idiom is to test for "readl(&cfg) & (1 << flagpos)", or
use the BIT() macro.

I could not quite get how the counter and the timer interact
here.  Is my interpretation correct that there is a timeout, and
you succeed if you can see three consecutive set flags within
that time span?

> +static void cadence_qspi_apb_readdata_capture(unsigned int bypass,
> +	unsigned int delay)
> +{
> +	unsigned int reg;
> +	cadence_qspi_apb_controller_disable();
> +
> +	reg = readl(&cadence_qspi_base->rddatacap);
> +
> +	if (bypass)
> +		reg |= (1 << CQSPI_REG_RD_DATA_CAPTURE_BYPASS_LSB);
> +	else
> +		reg &= ~(1 << CQSPI_REG_RD_DATA_CAPTURE_BYPASS_LSB);
> +
> +	reg &= ~(CQSPI_REG_RD_DATA_CAPTURE_DELAY_MASK
> +		<< CQSPI_REG_RD_DATA_CAPTURE_DELAY_LSB);
> +
> +	reg |= ((delay & CQSPI_REG_RD_DATA_CAPTURE_DELAY_MASK)
> +		<< CQSPI_REG_RD_DATA_CAPTURE_DELAY_LSB);
> +
> +	writel(reg, &cadence_qspi_base->rddatacap);
> +
> +	cadence_qspi_apb_controller_enable();
> +	return;
> +}

This could be a candidate for clrsetbits(), but I guess the "if
(bypass)" would complicate this operation.  So the above form may
be the most appropriate one.

> +static void cadence_qspi_apb_chipselect(unsigned int chip_select,
> +	unsigned int decoder_enable)
> +{
> +	unsigned int reg;
> +
> +	cadence_qspi_apb_controller_disable();
> +
> +	debug("%s : chipselect %d decode %d\n", __func__, chip_select,
> +		decoder_enable);
> +
> +	reg = readl(&cadence_qspi_base->cfg);
> +	/* docoder */
> +	if (decoder_enable)
> +		reg |= CQSPI_REG_CONFIG_DECODE_MASK;
> +	else {
> +		reg &= ~CQSPI_REG_CONFIG_DECODE_MASK;
> +		/* Convert CS if without decoder.
> +		 * CS0 to 4b'1110
> +		 * CS1 to 4b'1101
> +		 * CS2 to 4b'1011
> +		 * CS3 to 4b'0111
> +		 */
> +		chip_select = 0xF & ~(1 << chip_select);
> +	}

style nits: braces around if() arms, and mult line comments,
indentation of continuation lines

> +
> +	reg &= ~(CQSPI_REG_CONFIG_CHIPSELECT_MASK
> +			<< CQSPI_REG_CONFIG_CHIPSELECT_LSB);
> +	reg |= (chip_select & CQSPI_REG_CONFIG_CHIPSELECT_MASK)
> +			<< CQSPI_REG_CONFIG_CHIPSELECT_LSB;
> +	writel(reg, &cadence_qspi_base->cfg);
> +
> +	cadence_qspi_apb_controller_enable();
> +	return;
> +}

> +/* calibration sequence to determine the read data capture delay register */
> +int spi_calibration(struct spi_slave *slave)
> +{
> +	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
> +	u8 opcode_rdid = 0x9F;

Is there a symbolic name for this magic value, for those of us
who don't know SPI NOR command opcodes by heart. :)

> +int spi_cs_is_valid(unsigned int bus, unsigned int cs)
> +{
> +#if (CONFIG_CQSPI_DECODER == 1)
> +	if (((cs >= 0) && (cs < CQSPI_DECODER_MAX_CS)) && ((bus >= 0) &&
> +		(bus < CQSPI_DECODER_MAX_CS))) {
> +		return 1;
> +	}
> +#else
> +	if (((cs >= 0) && (cs < CQSPI_NO_DECODER_MAX_CS)) &&
> +		((bus >= 0) && (bus < CQSPI_NO_DECODER_MAX_CS))) {
> +		return 1;
> +	}
> +#endif
> +	printf("QSPI: Invalid bus or cs. Bus %d cs %d\n", bus, cs);
> +	return 0;
> +}

It took me a while to determine how those two cases are similar
and where exactly the difference is ...

> +int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *data_out,
> +		void *data_in, unsigned long flags)
> +{
> +[ ... ]
> +		switch (mode) {
> +		case CQSPI_STIG_READ:
> +			err = cadence_qspi_apb_command_read(
> +				cadence_qspi->cmd_len, cmd_buf,
> +				data_bytes, data_in);
> +
> +		break;
> +		case CQSPI_STIG_WRITE:
> +			err = cadence_qspi_apb_command_write(
> +				cadence_qspi->cmd_len, cmd_buf,
> +				data_bytes, data_out);
> +		break;
> +		case CQSPI_INDIRECT_READ:
> +			err = cadence_qspi_apb_indirect_read_setup(
> +				QSPI_AHB_BASE,
> +				cadence_qspi->cmd_len, cmd_buf);
> +			if (!err) {
> +				err = cadence_qspi_apb_indirect_read_execute
> +				(ahbbase, data_bytes, data_in);
> +			}
> +		break;
> +		case CQSPI_INDIRECT_WRITE:
> +			err = cadence_qspi_apb_indirect_write_setup
> +				(QSPI_AHB_BASE,
> +				cadence_qspi->cmd_len, cmd_buf);
> +			if (!err) {
> +				err = cadence_qspi_apb_indirect_write_execute
> +				(ahbbase, data_bytes, data_out);
> +			}
> +		break;
> +		default:
> +			err = -1;
> +			break;
> +		}

funny indentation here, for most of the 'break' instructions, and
for continuation lines (and braces around single statements?)

> --- /dev/null
> +++ b/drivers/spi/cadence_qspi.h
> [ ... ]
> +
> +/*
> + * Macro required for this driver
> + *
> + * #define CONFIG_CQSPI_BASE		(SOCFPGA_QSPIREGS_ADDRESS)
> + * #define CONFIG_CQSPI_AHB_BASE		(SOCFPGA_QSPIDATA_ADDRESS)
> + * -> To specify base address for controller CSR base and AHB data base addr
> + *
> + * #define CONFIG_CQSPI_REF_CLK		(400000000)
> + * -> The clock frequency supplied from PLL to the QSPI controller
> + *
> + * #define CONFIG_CQSPI_PAGE_SIZE		(256)
> + * -> To define the page size of serial flash in bytes
> + *
> + * #define CONFIG_CQSPI_BLOCK_SIZE		(16)
> + * -> To define the block size of serial flash in pages
> + *
> + * #define CONFIG_CQSPI_DECODER		(0)
> + * -> To enable the 4-to-16 decoder which enable up to 16 serial flash devices
> + *
> + * #define CONFIG_CQSPI_TSHSL_NS		(200)
> + * #define CONFIG_CQSPI_TSD2D_NS		(255)
> + * #define CONFIG_CQSPI_TCHSH_NS		(20)
> + * #define CONFIG_CQSPI_TSLCH_NS		(20)
> + * -> Configure controller based on serial flash device timing characteristic
> + */

parens around single numbers are useless, parens in C
preprocessor macros usually are only appropriate around complex
(multi word) macro RHS, and parameters which may resolve into
multiple words

> +/* Controller's configuration and status register */
> +#define	CQSPI_REG_CONFIG_CLK_POL_LSB		1
> +#define	CQSPI_REG_CONFIG_CLK_PHA_LSB		2
> +#define	CQSPI_REG_CONFIG_ENABLE_MASK		(1 << 0)
> +#define	CQSPI_REG_CONFIG_DIRECT_MASK		(1 << 7)
> +#define	CQSPI_REG_CONFIG_DECODE_MASK		(1 << 9)
> +#define	CQSPI_REG_CONFIG_XIP_IMM_MASK		(1 << 18)
> +#define	CQSPI_REG_CONFIG_CHIPSELECT_LSB		10
> +#define	CQSPI_REG_CONFIG_BAUD_LSB		19
> +#define	CQSPI_REG_CONFIG_IDLE_LSB		31

Here you seem to mix styles of declaring flag bit positions, and
flag mask values.  Can you check those (and the other decls that
I did not cite for brevity), and potentially get a single style?
See if the BIT() macro can furhter improve readability.


virtually yours
Gerhard Sittig

Patch

diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index ed4ecd7..b8d56ea 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -15,6 +15,7 @@  obj-$(CONFIG_ATMEL_DATAFLASH_SPI) += atmel_dataflash_spi.o
 obj-$(CONFIG_ATMEL_SPI) += atmel_spi.o
 obj-$(CONFIG_BFIN_SPI) += bfin_spi.o
 obj-$(CONFIG_BFIN_SPI6XX) += bfin_spi6xx.o
+obj-$(CONFIG_CADENCE_QSPI) += cadence_qspi.o
 obj-$(CONFIG_CF_SPI) += cf_spi.o
 obj-$(CONFIG_CF_QSPI) += cf_qspi.o
 obj-$(CONFIG_DAVINCI_SPI) += davinci_spi.o
diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
new file mode 100644
index 0000000..4712b45
--- /dev/null
+++ b/drivers/spi/cadence_qspi.c
@@ -0,0 +1,1018 @@ 
+/*
+ * (C) Copyright 2014 Altera Corporation <www.altera.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <asm/errno.h>
+#include <malloc.h>
+#include <spi.h>
+#include "cadence_qspi.h"
+
+static int qspi_is_init;
+static unsigned int qspi_calibrated_hz;
+static unsigned int qspi_calibrated_cs;
+
+static const struct cadence_qspi *cadence_qspi_base = (void *)QSPI_BASE;
+
+#define to_cadence_qspi_slave(s)		\
+		container_of(s, struct cadence_qspi_slave, slave)
+
+#define CQSPI_CAL_DELAY(tdelay_ns, tref_ns, tsclk_ns)	\
+	((((tdelay_ns) - (tsclk_ns)) / (tref_ns)))
+
+#define CQSPI_GET_WR_SRAM_LEVEL()		\
+	((readl(&cadence_qspi_base->sramfill) >>	\
+	CQSPI_REG_SRAMLEVEL_WR_LSB) & CQSPI_REG_SRAMLEVEL_WR_MASK)
+
+static unsigned int cadence_qspi_apb_cmd2addr(const unsigned char *addr_buf,
+	unsigned int addr_width)
+{
+	unsigned int addr;
+
+	addr = (addr_buf[0] << 16) | (addr_buf[1] << 8) | addr_buf[2];
+
+	if (addr_width == 4)
+		addr = (addr << 8) | addr_buf[3];
+
+	return addr;
+}
+
+static void cadence_qspi_apb_read_fifo_data(void *dest,
+	const void *src_ahb_addr, unsigned int bytes)
+{
+	unsigned int temp;
+	int remaining = bytes;
+	unsigned int *dest_ptr = (unsigned int *)dest;
+	unsigned int *src_ptr = (unsigned int *)src_ahb_addr;
+
+	while (remaining > 0) {
+		if (remaining >= CQSPI_FIFO_WIDTH) {
+			*dest_ptr = readl(src_ptr);
+			remaining -= CQSPI_FIFO_WIDTH;
+		} else {
+			/* dangling bytes */
+			temp = readl(src_ptr);
+			memcpy(dest_ptr, &temp, remaining);
+			break;
+		}
+		dest_ptr++;
+	}
+
+	return;
+}
+
+static void cadence_qspi_apb_write_fifo_data(const void *dest_ahb_addr,
+	const void *src, unsigned int bytes)
+{
+	unsigned int temp;
+	int remaining = bytes;
+	unsigned int *dest_ptr = (unsigned int *)dest_ahb_addr;
+	unsigned int *src_ptr = (unsigned int *)src;
+
+	while (remaining > 0) {
+		if (remaining >= CQSPI_FIFO_WIDTH) {
+			writel(*src_ptr, dest_ptr);
+			remaining -= sizeof(unsigned int);
+		} else {
+			/* dangling bytes */
+			memcpy(&temp, src_ptr, remaining);
+			writel(temp, dest_ptr);
+			break;
+		}
+		src_ptr++;
+	}
+
+	return;
+}
+
+/* Read from SRAM FIFO with polling SRAM fill level. */
+static int qspi_read_sram_fifo_poll(void *dest_addr,
+			const void *src_addr,  unsigned int num_bytes)
+{
+	unsigned int remaining = num_bytes;
+	unsigned int retry;
+	unsigned int sram_level = 0;
+	unsigned char *dest = (unsigned char *)dest_addr;
+
+	while (remaining > 0) {
+		retry = CQSPI_REG_RETRY;
+		while (retry--) {
+			sram_level = (readl(&cadence_qspi_base->sramfill) >>
+				CQSPI_REG_SRAMLEVEL_RD_LSB) &
+				CQSPI_REG_SRAMLEVEL_RD_MASK;
+			if (sram_level)
+				break;
+			udelay(1);
+		}
+
+		if (!retry) {
+			printf("QSPI: No receive data after polling for %d "
+				"times\n", CQSPI_REG_RETRY);
+			return -1;
+		}
+
+		sram_level *= CQSPI_FIFO_WIDTH;
+		sram_level = sram_level > remaining ? remaining : sram_level;
+
+		/* Read data from FIFO. */
+		cadence_qspi_apb_read_fifo_data(dest, src_addr, sram_level);
+		dest += sram_level;
+		remaining -= sram_level;
+		udelay(1);
+	}
+	return 0;
+}
+
+
+/* Write to SRAM FIFO with polling SRAM fill level. */
+static int qpsi_write_sram_fifo_push(void *dest_addr,
+				const void *src_addr, unsigned int num_bytes)
+{
+	unsigned int retry = CQSPI_REG_RETRY;
+	unsigned int sram_level;
+	unsigned int wr_bytes;
+	unsigned char *src = (unsigned char *)src_addr;
+	int remaining = num_bytes;
+	unsigned int page_size = CONFIG_CQSPI_PAGE_SIZE;
+	unsigned int sram_threshold_words = CQSPI_REG_SRAM_THRESHOLD_WORDS;
+
+	while (remaining > 0) {
+		retry = CQSPI_REG_RETRY;
+		while (retry--) {
+			sram_level = CQSPI_GET_WR_SRAM_LEVEL();
+			if (sram_level <= sram_threshold_words)
+				break;
+		}
+		if (!retry) {
+			printf("QSPI: SRAM fill level (0x%08x) "
+				"not hit lower expected level (0x%08x)",
+				sram_level, sram_threshold_words);
+			return -1;
+		}
+		/* Write a page or remaining bytes. */
+		wr_bytes = (remaining > page_size) ?
+					page_size : remaining;
+
+		cadence_qspi_apb_write_fifo_data(dest_addr, src, wr_bytes);
+		src += wr_bytes;
+		remaining -= wr_bytes;
+	}
+
+	return 0;
+}
+
+static void cadence_qspi_apb_controller_enable(void)
+{
+	setbits_le32(&cadence_qspi_base->cfg, CQSPI_REG_CONFIG_ENABLE_MASK);
+}
+
+static void cadence_qspi_apb_controller_disable(void)
+{
+	clrbits_le32(&cadence_qspi_base->cfg, CQSPI_REG_CONFIG_ENABLE_MASK);
+}
+
+/* Return 1 if idle, otherwise return 0 (busy). */
+static unsigned int cadence_qspi_wait_idle(void)
+{
+	unsigned int start, count = 0;
+	/* timeout in unit of ms */
+	unsigned int timeout = 5000;
+
+	start = get_timer(0);
+	for ( ; get_timer(start) < timeout ; ) {
+		if ((readl(&cadence_qspi_base->cfg) >>
+			CQSPI_REG_CONFIG_IDLE_LSB) & 0x1)
+			count++;
+		else
+			count = 0;
+		/*
+		 * Ensure the QSPI controller is in true idle state after
+		 * reading back the same idle status consecutively
+		 */
+		if (count >= CQSPI_POLL_IDLE_RETRY)
+			return 1;
+	}
+
+	/* Timeout, still in busy mode. */
+	printf("QSPI: QSPI is still busy after poll for %d times.\n",
+		CQSPI_REG_RETRY);
+	return 0;
+}
+
+static void cadence_qspi_apb_readdata_capture(unsigned int bypass,
+	unsigned int delay)
+{
+	unsigned int reg;
+	cadence_qspi_apb_controller_disable();
+
+	reg = readl(&cadence_qspi_base->rddatacap);
+
+	if (bypass)
+		reg |= (1 << CQSPI_REG_RD_DATA_CAPTURE_BYPASS_LSB);
+	else
+		reg &= ~(1 << CQSPI_REG_RD_DATA_CAPTURE_BYPASS_LSB);
+
+	reg &= ~(CQSPI_REG_RD_DATA_CAPTURE_DELAY_MASK
+		<< CQSPI_REG_RD_DATA_CAPTURE_DELAY_LSB);
+
+	reg |= ((delay & CQSPI_REG_RD_DATA_CAPTURE_DELAY_MASK)
+		<< CQSPI_REG_RD_DATA_CAPTURE_DELAY_LSB);
+
+	writel(reg, &cadence_qspi_base->rddatacap);
+
+	cadence_qspi_apb_controller_enable();
+	return;
+}
+
+static void cadence_qspi_apb_config_baudrate_div(unsigned int ref_clk_hz,
+	unsigned int sclk_hz)
+{
+	unsigned int reg;
+	unsigned int div;
+
+	cadence_qspi_apb_controller_disable();
+	reg = readl(&cadence_qspi_base->cfg);
+	reg &= ~(CQSPI_REG_CONFIG_BAUD_MASK << CQSPI_REG_CONFIG_BAUD_LSB);
+
+	div = ref_clk_hz / sclk_hz;
+
+	if (div > 32)
+		div = 32;
+
+	/* Check if even number. */
+	if ((div & 1))
+		div = (div / 2);
+	else
+		div = (div / 2) - 1;
+
+	debug("%s: ref_clk %dHz sclk %dHz Div 0x%x\n", __func__,
+		ref_clk_hz, sclk_hz, div);
+
+	div = (div & CQSPI_REG_CONFIG_BAUD_MASK) << CQSPI_REG_CONFIG_BAUD_LSB;
+	reg |= div;
+	writel(reg, &cadence_qspi_base->cfg);
+
+	cadence_qspi_apb_controller_enable();
+	return;
+}
+
+static void cadence_qspi_apb_set_clk_mode(unsigned int clk_pol,
+	unsigned int clk_pha)
+{
+	unsigned int reg;
+
+	cadence_qspi_apb_controller_disable();
+	reg = readl(&cadence_qspi_base->cfg);
+	reg &= ~(1 <<
+		(CQSPI_REG_CONFIG_CLK_POL_LSB | CQSPI_REG_CONFIG_CLK_PHA_LSB));
+
+	reg |= ((clk_pol & 0x1) << CQSPI_REG_CONFIG_CLK_POL_LSB);
+	reg |= ((clk_pha & 0x1) << CQSPI_REG_CONFIG_CLK_PHA_LSB);
+
+	writel(reg, &cadence_qspi_base->cfg);
+
+	cadence_qspi_apb_controller_enable();
+	return;
+}
+
+static void cadence_qspi_apb_chipselect(unsigned int chip_select,
+	unsigned int decoder_enable)
+{
+	unsigned int reg;
+
+	cadence_qspi_apb_controller_disable();
+
+	debug("%s : chipselect %d decode %d\n", __func__, chip_select,
+		decoder_enable);
+
+	reg = readl(&cadence_qspi_base->cfg);
+	/* docoder */
+	if (decoder_enable)
+		reg |= CQSPI_REG_CONFIG_DECODE_MASK;
+	else {
+		reg &= ~CQSPI_REG_CONFIG_DECODE_MASK;
+		/* Convert CS if without decoder.
+		 * CS0 to 4b'1110
+		 * CS1 to 4b'1101
+		 * CS2 to 4b'1011
+		 * CS3 to 4b'0111
+		 */
+		chip_select = 0xF & ~(1 << chip_select);
+	}
+
+	reg &= ~(CQSPI_REG_CONFIG_CHIPSELECT_MASK
+			<< CQSPI_REG_CONFIG_CHIPSELECT_LSB);
+	reg |= (chip_select & CQSPI_REG_CONFIG_CHIPSELECT_MASK)
+			<< CQSPI_REG_CONFIG_CHIPSELECT_LSB;
+	writel(reg, &cadence_qspi_base->cfg);
+
+	cadence_qspi_apb_controller_enable();
+	return;
+}
+
+static void cadence_qspi_apb_delay(unsigned int ref_clk, unsigned int sclk_hz,
+	unsigned int tshsl_ns, unsigned int tsd2d_ns,
+	unsigned int tchsh_ns, unsigned int tslch_ns)
+{
+	unsigned int ref_clk_ns;
+	unsigned int sclk_ns;
+	unsigned int tshsl, tchsh, tslch, tsd2d;
+	unsigned int reg;
+
+	cadence_qspi_apb_controller_disable();
+
+	/* Convert to ns. */
+	ref_clk_ns = (1000000000) / ref_clk;
+
+	/* Convert to ns. */
+	sclk_ns = (1000000000) / sclk_hz;
+
+	/* Plus 1 to round up 1 clock cycle. */
+	tshsl = CQSPI_CAL_DELAY(tshsl_ns, ref_clk_ns, sclk_ns) + 1;
+	tchsh = CQSPI_CAL_DELAY(tchsh_ns, ref_clk_ns, sclk_ns) + 1;
+	tslch = CQSPI_CAL_DELAY(tslch_ns, ref_clk_ns, sclk_ns) + 1;
+	tsd2d = CQSPI_CAL_DELAY(tsd2d_ns, ref_clk_ns, sclk_ns) + 1;
+
+	reg = ((tshsl & CQSPI_REG_DELAY_TSHSL_MASK)
+			<< CQSPI_REG_DELAY_TSHSL_LSB);
+	reg |= ((tchsh & CQSPI_REG_DELAY_TCHSH_MASK)
+			<< CQSPI_REG_DELAY_TCHSH_LSB);
+	reg |= ((tslch & CQSPI_REG_DELAY_TSLCH_MASK)
+			<< CQSPI_REG_DELAY_TSLCH_LSB);
+	reg |= ((tsd2d & CQSPI_REG_DELAY_TSD2D_MASK)
+			<< CQSPI_REG_DELAY_TSD2D_LSB);
+	writel(reg, &cadence_qspi_base->delay);
+
+	cadence_qspi_apb_controller_enable();
+	return;
+}
+
+static void cadence_qspi_apb_controller_init(void)
+{
+	unsigned reg;
+
+	cadence_qspi_apb_controller_disable();
+
+	/* Configure the device size and address bytes */
+	reg = readl(&cadence_qspi_base->devsz);
+	/* Clear the previous value */
+	reg &= ~(CQSPI_REG_SIZE_PAGE_MASK << CQSPI_REG_SIZE_PAGE_LSB);
+	reg &= ~(CQSPI_REG_SIZE_BLOCK_MASK << CQSPI_REG_SIZE_BLOCK_LSB);
+	reg |= (CONFIG_CQSPI_PAGE_SIZE << CQSPI_REG_SIZE_PAGE_LSB);
+	reg |= (CONFIG_CQSPI_BLOCK_SIZE << CQSPI_REG_SIZE_BLOCK_LSB);
+	writel(reg, &cadence_qspi_base->devsz);
+
+	/* Configure the remap address register, no remap */
+	writel(0, &cadence_qspi_base->remapaddr);
+
+	/* Disable all interrupts */
+	writel(0, &cadence_qspi_base->irqmask);
+
+	cadence_qspi_apb_controller_enable();
+	return;
+}
+
+static int cadence_qspi_apb_exec_flash_cmd(unsigned int reg)
+{
+	unsigned int retry = CQSPI_REG_RETRY;
+
+	/* Write the CMDCTRL without start execution. */
+	writel(reg, &cadence_qspi_base->flashcmd);
+	/* Start execute */
+	reg |= CQSPI_REG_CMDCTRL_EXECUTE_MASK;
+	writel(reg, &cadence_qspi_base->flashcmd);
+
+	while (retry--) {
+		reg = readl(&cadence_qspi_base->flashcmd);
+		if ((reg & CQSPI_REG_CMDCTRL_INPROGRESS_MASK) == 0)
+			break;
+		udelay(1);
+	}
+
+	if (!retry) {
+		printf("QSPI: flash command execution timeout\n");
+		return -EIO;
+	}
+
+	/* Polling QSPI idle status. */
+	if (!cadence_qspi_wait_idle())
+		return -EIO;
+
+	return 0;
+}
+
+/* For command RDID, RDSR. */
+static int cadence_qspi_apb_command_read(unsigned int cmdlen, const u8 *cmdbuf,
+	unsigned int rxlen, u8 *rxbuf)
+{
+	unsigned int reg;
+	unsigned int read_len;
+	int status;
+
+	if (!cmdlen || rxlen > CQSPI_STIG_DATA_LEN_MAX || rxbuf == NULL) {
+		printf("QSPI: Invalid input arguments cmdlen %d "
+			"rxlen %d\n", cmdlen, rxlen);
+		return -EINVAL;
+	}
+
+	reg = cmdbuf[0] << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+
+	reg |= (0x1 << CQSPI_REG_CMDCTRL_RD_EN_LSB);
+
+	/* 0 means 1 byte. */
+	reg |= (((rxlen - 1) & CQSPI_REG_CMDCTRL_RD_BYTES_MASK)
+		<< CQSPI_REG_CMDCTRL_RD_BYTES_LSB);
+	status = cadence_qspi_apb_exec_flash_cmd(reg);
+	if (status != 0)
+		return status;
+
+	reg = readl(&cadence_qspi_base->flashcmdrddatalo);
+
+	/* Put the read value into rx_buf */
+	read_len = (rxlen > 4) ? 4 : rxlen;
+	memcpy(rxbuf, &reg, read_len);
+	rxbuf += read_len;
+
+	if (rxlen > 4) {
+		reg = readl(&cadence_qspi_base->flashcmdrddataup);
+
+		read_len = rxlen - read_len;
+		memcpy(rxbuf, &reg, read_len);
+	}
+	return 0;
+}
+
+/* For commands: WRSR, WREN, WRDI, CHIP_ERASE, BE, etc. */
+static int cadence_qspi_apb_command_write(unsigned int cmdlen,
+	const u8 *cmdbuf, unsigned int txlen,  const u8 *txbuf)
+{
+	unsigned int reg = 0;
+	unsigned int addr_value;
+	unsigned int wr_data;
+	unsigned int wr_len;
+
+	if (!cmdlen || cmdlen > 5 || txlen > 8 || cmdbuf == NULL) {
+		printf("QSPI: Invalid input arguments cmdlen %d txlen %d\n",
+			cmdlen, txlen);
+		return -EINVAL;
+	}
+
+	reg |= cmdbuf[0] << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+
+	if (cmdlen == 4 || cmdlen == 5) {
+		/* Command with address */
+		reg |= (0x1 << CQSPI_REG_CMDCTRL_ADDR_EN_LSB);
+		/* Number of bytes to write. */
+		reg |= ((cmdlen - 2) & CQSPI_REG_CMDCTRL_ADD_BYTES_MASK)
+			<< CQSPI_REG_CMDCTRL_ADD_BYTES_LSB;
+		/* Get address */
+		addr_value = cadence_qspi_apb_cmd2addr(&cmdbuf[1],
+			cmdlen >= 5 ? 4 : 3);
+
+		writel(addr_value, &cadence_qspi_base->flashcmdaddr);
+	}
+
+	if (txlen) {
+		/* writing data = yes */
+		reg |= (0x1 << CQSPI_REG_CMDCTRL_WR_EN_LSB);
+		reg |= ((txlen - 1) & CQSPI_REG_CMDCTRL_WR_BYTES_MASK)
+			<< CQSPI_REG_CMDCTRL_WR_BYTES_LSB;
+
+		wr_len = txlen > 4 ? 4 : txlen;
+		memcpy(&wr_data, txbuf, wr_len);
+		writel(wr_data, &cadence_qspi_base->flashcmdwrdatalo);
+
+		if (txlen > 4) {
+			txbuf += wr_len;
+			wr_len = txlen - wr_len;
+			memcpy(&wr_data, txbuf, wr_len);
+			writel(wr_data, &cadence_qspi_base->flashcmdwrdataup);
+		}
+	}
+
+	/* Execute the command */
+	return cadence_qspi_apb_exec_flash_cmd(reg);
+}
+
+/* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
+static int cadence_qspi_apb_indirect_read_setup(unsigned int ahb_phy_addr,
+	unsigned int cmdlen, const u8 *cmdbuf)
+{
+	unsigned int reg;
+	unsigned int rd_reg;
+	unsigned int addr_value;
+	unsigned int dummy_clk;
+	unsigned int dummy_bytes;
+	unsigned int addr_bytes;
+
+	/*
+	 * Identify addr_byte. All NOR flash device drivers are using fast read
+	 * which always expecting 1 dummy byte, 1 cmd byte and 3/4 addr byte.
+	 * With that, the length is in value of 5 or 6. Only FRAM chip from
+	 * ramtron using normal read (which won't need dummy byte).
+	 * Unlikely NOR flash using normal read due to performance issue.
+	 */
+	if (cmdlen >= 5)
+		/* to cater fast read where cmd + addr + dummy */
+		addr_bytes = cmdlen - 2;
+	else
+		/* for normal read (only ramtron as of now) */
+		addr_bytes = cmdlen - 1;
+
+	/* Setup the indirect trigger address */
+	writel((ahb_phy_addr & CQSPI_INDIRECTTRIGGER_ADDR_MASK),
+		&cadence_qspi_base->indaddrtrig);
+
+	/* Configure SRAM partition for read. */
+	writel(CQSPI_REG_SRAM_PARTITION_RD, &cadence_qspi_base->srampart);
+
+	/* Configure the opcode */
+	rd_reg = cmdbuf[0] << CQSPI_REG_RD_INSTR_OPCODE_LSB;
+
+#if (CONFIG_SPI_FLASH_QUAD == 1)
+	/* Instruction and address at DQ0, data at DQ0-3. */
+	rd_reg |= CQSPI_INST_TYPE_QUAD << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
+#endif
+
+	/* Get address */
+	addr_value = cadence_qspi_apb_cmd2addr(&cmdbuf[1], addr_bytes);
+	writel(addr_value, &cadence_qspi_base->indrdstaddr);
+
+	/* The remaining lenght is dummy bytes. */
+	dummy_bytes = cmdlen - addr_bytes - 1;
+	if (dummy_bytes) {
+
+		if (dummy_bytes > CQSPI_DUMMY_BYTES_MAX)
+			dummy_bytes = CQSPI_DUMMY_BYTES_MAX;
+
+		rd_reg |= (1 << CQSPI_REG_RD_INSTR_MODE_EN_LSB);
+#if defined(CONFIG_SPL_SPI_XIP) && defined(CONFIG_SPL_BUILD)
+		writel(0x0, &cadence_qspi_base->modebit);
+#else
+		writel(0xFF, &cadence_qspi_base->modebit);
+#endif
+
+		/* Convert to clock cycles. */
+		dummy_clk = dummy_bytes * CQSPI_DUMMY_CLKS_PER_BYTE;
+		/* Need to minus the mode byte (8 clocks). */
+		dummy_clk -= CQSPI_DUMMY_CLKS_PER_BYTE;
+
+		if (dummy_clk)
+			rd_reg |= (dummy_clk & CQSPI_REG_RD_INSTR_DUMMY_MASK)
+				<< CQSPI_REG_RD_INSTR_DUMMY_LSB;
+	}
+
+	writel(rd_reg, &cadence_qspi_base->devrd);
+
+	/* set device size */
+	reg = readl(&cadence_qspi_base->devsz);
+	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
+	reg |= (addr_bytes - 1);
+	writel(reg, &cadence_qspi_base->devsz);
+	return 0;
+}
+
+static int cadence_qspi_apb_indirect_read_execute(void *ahb_base_addr,
+	unsigned int rxlen, u8 *rxbuf)
+{
+	unsigned int reg;
+
+	writel(rxlen, &cadence_qspi_base->indrdcnt);
+
+	/* Start the indirect read transfer */
+	writel(CQSPI_REG_INDIRECTRD_START_MASK,
+			&cadence_qspi_base->indrd);
+
+	if (qspi_read_sram_fifo_poll((void *)rxbuf,
+				(const void *)ahb_base_addr, rxlen)) {
+		goto failrd;
+	}
+
+	/* Check flash indirect controller */
+	reg = readl(&cadence_qspi_base->indrd);
+	if (!(reg & CQSPI_REG_INDIRECTRD_DONE_MASK)) {
+		reg = readl(&cadence_qspi_base->indrd);
+		printf("QSPI: indirect completion status "
+			"error with reg 0x%08x\n", reg);
+		goto failrd;
+	}
+
+	/* Clear indirect completion status */
+	writel(CQSPI_REG_INDIRECTRD_DONE_MASK, &cadence_qspi_base->indrd);
+	return 0;
+
+failrd:
+	/* Cancel the indirect read */
+	writel(CQSPI_REG_INDIRECTRD_CANCEL_MASK, &cadence_qspi_base->indrd);
+	return -1;
+}
+
+/* Opcode + Address (3/4 bytes) */
+static int cadence_qspi_apb_indirect_write_setup(unsigned int ahb_phy_addr,
+	unsigned int cmdlen, const u8 *cmdbuf)
+{
+	unsigned int reg;
+	unsigned int addr_bytes = cmdlen > 4 ? 4 : 3;
+
+	if (cmdlen < 4 || cmdbuf == NULL) {
+		printf("QSPI: iInvalid input argument, len %d cmdbuf 0x%08x\n",
+			cmdlen, (unsigned int)cmdbuf);
+		return -EINVAL;
+	}
+	/* Setup the indirect trigger address */
+	writel((ahb_phy_addr & CQSPI_INDIRECTTRIGGER_ADDR_MASK),
+		&cadence_qspi_base->indaddrtrig);
+
+	writel(CQSPI_REG_SRAM_PARTITION_WR,
+		&cadence_qspi_base->srampart);
+
+	/* Configure the opcode */
+	reg = cmdbuf[0] << CQSPI_REG_WR_INSTR_OPCODE_LSB;
+	writel(reg, &cadence_qspi_base->devwr);
+
+	/* Setup write address. */
+	reg = cadence_qspi_apb_cmd2addr(&cmdbuf[1], addr_bytes);
+	writel(reg, &cadence_qspi_base->indwrstaddr);
+
+	reg = readl(&cadence_qspi_base->devsz);
+	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
+	reg |= (addr_bytes - 1);
+	writel(reg, &cadence_qspi_base->devsz);
+	return 0;
+}
+
+static int cadence_qspi_apb_indirect_write_execute(void *ahb_base_addr,
+	unsigned int txlen, const u8 *txbuf)
+{
+	unsigned int reg = 0;
+	unsigned int retry;
+
+	/* Configure the indirect read transfer bytes */
+	writel(txlen, &cadence_qspi_base->indwrcnt);
+
+	/* Start the indirect write transfer */
+	writel(CQSPI_REG_INDIRECTWR_START_MASK,	&cadence_qspi_base->indwr);
+
+	if (qpsi_write_sram_fifo_push(ahb_base_addr,
+		(const void *)txbuf, txlen)) {
+		goto failwr;
+	}
+
+	/* Wait until last write is completed (FIFO empty) */
+	retry = CQSPI_REG_RETRY;
+	while (retry--) {
+		reg = CQSPI_GET_WR_SRAM_LEVEL();
+		if (reg == 0)
+			break;
+
+		udelay(1);
+	}
+	if (reg != 0) {
+		printf("QSPI: timeout for indirect write\n");
+		goto failwr;
+	}
+
+	/* Check flash indirect controller status */
+	retry = CQSPI_REG_RETRY;
+	while (retry--) {
+		reg = readl(&cadence_qspi_base->indwr);
+		if (reg & CQSPI_REG_INDIRECTWR_DONE_MASK)
+			break;
+		udelay(1);
+	}
+	if (!(reg & CQSPI_REG_INDIRECTWR_DONE_MASK)) {
+		printf("QSPI: indirect completion "
+			"status error with reg 0x%08x\n", reg);
+		goto failwr;
+	}
+
+	/* Clear indirect completion status */
+	writel(CQSPI_REG_INDIRECTWR_DONE_MASK, &cadence_qspi_base->indwr);
+	return 0;
+
+failwr:
+	/* Cancel the indirect write */
+	writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK, &cadence_qspi_base->indwr);
+	return -1;
+}
+
+static void cadence_qspi_apb_enter_xip(char xip_dummy)
+{
+	unsigned int reg;
+
+	/* enter XiP mode immediately and enable direct mode */
+	reg = readl(&cadence_qspi_base->cfg);
+	reg |= CQSPI_REG_CONFIG_ENABLE_MASK;
+	reg |= CQSPI_REG_CONFIG_DIRECT_MASK;
+	reg |= CQSPI_REG_CONFIG_XIP_IMM_MASK;
+	writel(reg, &cadence_qspi_base->cfg);
+
+	/* keep the XiP mode */
+	writel(xip_dummy, &cadence_qspi_base->modebit);
+
+	/* Enable mode bit at devrd */
+	reg = readl(&cadence_qspi_base->devrd);
+	reg |= (1 << CQSPI_REG_RD_INSTR_MODE_EN_LSB);
+	writel(reg, &cadence_qspi_base->devrd);
+}
+
+void spi_set_speed(struct spi_slave *slave, uint hz)
+{
+	cadence_qspi_apb_config_baudrate_div(CONFIG_CQSPI_REF_CLK, hz);
+
+	/* Reconfigure delay timing if speed is changed. */
+	cadence_qspi_apb_delay(CONFIG_CQSPI_REF_CLK, hz,
+		CONFIG_CQSPI_TSHSL_NS, CONFIG_CQSPI_TSD2D_NS,
+		CONFIG_CQSPI_TCHSH_NS, CONFIG_CQSPI_TSLCH_NS);
+	return;
+}
+
+/* calibration sequence to determine the read data capture delay register */
+int spi_calibration(struct spi_slave *slave)
+{
+	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
+	u8 opcode_rdid = 0x9F;
+	unsigned int idcode = 0, temp = 0;
+	int err = 0, i, range_lo = -1, range_hi = -1;
+
+	/* start with slowest clock (1 MHz) */
+	spi_set_speed(slave, 1000000);
+
+	/* configure the read data capture delay register to 0 */
+	cadence_qspi_apb_readdata_capture(1, 0);
+
+	/* Enable QSPI */
+	cadence_qspi_apb_controller_enable();
+
+	/* read the ID which will be our golden value */
+	err = cadence_qspi_apb_command_read(1, &opcode_rdid,
+		3, (u8 *)&idcode);
+	if (err) {
+		puts("SF: Calibration failed (read)\n");
+		return err;
+	}
+
+	/* use back the intended clock and find low range */
+	spi_set_speed(slave, cadence_qspi->max_hz);
+	for (i = 0; i < CQSPI_READ_CAPTURE_MAX_DELAY; i++) {
+		/* Disable QSPI */
+		cadence_qspi_apb_controller_disable();
+
+		/* reconfigure the read data capture delay register */
+		cadence_qspi_apb_readdata_capture(1, i);
+
+		/* Enable back QSPI */
+		cadence_qspi_apb_controller_enable();
+
+		/* issue a RDID to get the ID value */
+		err = cadence_qspi_apb_command_read(1, &opcode_rdid,
+			3, (u8 *)&temp);
+		if (err) {
+			puts("SF: Calibration failed (read)\n");
+			return err;
+		}
+
+		/* search for range lo */
+		if (range_lo == -1 && temp == idcode) {
+			range_lo = i;
+			continue;
+		}
+
+		/* search for range hi */
+		if (range_lo != -1 && temp != idcode) {
+			range_hi = i - 1;
+			break;
+		}
+		range_hi = i;
+	}
+
+	if (range_lo == -1) {
+		puts("SF: Calibration failed (low range)\n");
+		return err;
+	}
+
+	/* Disable QSPI for subsequent initialization */
+	cadence_qspi_apb_controller_disable();
+
+	/* configure the final value for read data capture delay register */
+	cadence_qspi_apb_readdata_capture(1, (range_hi + range_lo) / 2);
+	printf("SF: Read data capture delay calibrated to %i (%i - %i)\n",
+		(range_hi + range_lo) / 2, range_lo, range_hi);
+
+	/* just to ensure we do once only when speed or chip select change */
+	qspi_calibrated_hz = cadence_qspi->max_hz;
+	qspi_calibrated_cs = slave->cs;
+	return 0;
+}
+
+int spi_cs_is_valid(unsigned int bus, unsigned int cs)
+{
+#if (CONFIG_CQSPI_DECODER == 1)
+	if (((cs >= 0) && (cs < CQSPI_DECODER_MAX_CS)) && ((bus >= 0) &&
+		(bus < CQSPI_DECODER_MAX_CS))) {
+		return 1;
+	}
+#else
+	if (((cs >= 0) && (cs < CQSPI_NO_DECODER_MAX_CS)) &&
+		((bus >= 0) && (bus < CQSPI_NO_DECODER_MAX_CS))) {
+		return 1;
+	}
+#endif
+	printf("QSPI: Invalid bus or cs. Bus %d cs %d\n", bus, cs);
+	return 0;
+}
+
+void spi_cs_activate(struct spi_slave *slave)
+{
+	return;
+}
+
+void spi_cs_deactivate(struct spi_slave *slave)
+{
+	return;
+}
+
+void spi_init(void)
+{
+	cadence_qspi_apb_controller_init();
+	qspi_is_init = 1;
+	return;
+}
+
+struct spi_slave *spi_setup_slave(unsigned int bus, unsigned int cs,
+		unsigned int max_hz, unsigned int mode)
+{
+	struct cadence_qspi_slave *cadence_qspi;
+
+	debug("%s: bus %d cs %d max_hz %dMHz mode %d\n", __func__,
+		bus, cs, max_hz/1000000, mode);
+
+	if (!spi_cs_is_valid(bus, cs))
+		return NULL;
+
+	cadence_qspi = malloc(sizeof(struct cadence_qspi_slave));
+	if (!cadence_qspi) {
+		printf("QSPI: Can't allocate struct cadence_qspi_slave. "
+			"Bus %d cs %d\n", bus, cs);
+		return NULL;
+	}
+
+	cadence_qspi->slave.bus = bus;
+	cadence_qspi->slave.cs = cs;
+	cadence_qspi->mode = mode;
+	cadence_qspi->max_hz = max_hz;
+	cadence_qspi->regbase = (void *)QSPI_BASE;
+	cadence_qspi->ahbbase = (void *)QSPI_AHB_BASE;
+
+	if (!qspi_is_init)
+		spi_init();
+
+	return &cadence_qspi->slave;
+}
+
+void spi_free_slave(struct spi_slave *slave)
+{
+	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
+	free(cadence_qspi);
+	return;
+}
+
+int spi_claim_bus(struct spi_slave *slave)
+{
+	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
+	unsigned int clk_pol = (cadence_qspi->mode & SPI_CPOL) ? 1 : 0;
+	unsigned int clk_pha = (cadence_qspi->mode & SPI_CPHA) ? 1 : 0;
+	int err = 0;
+
+	debug("%s: bus:%i cs:%i\n", __func__, slave->bus, slave->cs);
+
+	/* Disable QSPI */
+	cadence_qspi_apb_controller_disable();
+
+	/* Set Chip select */
+	cadence_qspi_apb_chipselect(slave->cs, CONFIG_CQSPI_DECODER);
+
+	/* Set SPI mode */
+	cadence_qspi_apb_set_clk_mode(clk_pol, clk_pha);
+
+	/* Set clock speed */
+	spi_set_speed(slave, cadence_qspi->max_hz);
+
+	/* calibration required for different SCLK speed or chip select */
+	if (qspi_calibrated_hz != cadence_qspi->max_hz ||
+		qspi_calibrated_cs != slave->cs) {
+		err = spi_calibration(slave);
+		if (err)
+			return err;
+	}
+
+	/* Enable QSPI */
+	cadence_qspi_apb_controller_enable();
+
+	return 0;
+}
+
+void spi_release_bus(struct spi_slave *slave)
+{
+	return;
+}
+
+int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *data_out,
+		void *data_in, unsigned long flags)
+{
+	struct cadence_qspi_slave *cadence_qspi = to_cadence_qspi_slave(slave);
+	void *ahbbase = cadence_qspi->ahbbase;
+	u8 *cmd_buf = cadence_qspi->cmd_buf;
+	size_t data_bytes;
+	int err = 0;
+	u32 mode = CQSPI_STIG_WRITE;
+
+	if (flags & SPI_XFER_BEGIN) {
+		/* copy command to local buffer */
+		cadence_qspi->cmd_len = bitlen / 8;
+		memcpy(cmd_buf, data_out, cadence_qspi->cmd_len);
+	}
+
+	if (flags == (SPI_XFER_BEGIN | SPI_XFER_END)) {
+		/* if start and end bit are set, the data bytes is 0. */
+		data_bytes = 0;
+	} else {
+		data_bytes = bitlen / 8;
+	}
+
+	if ((flags & SPI_XFER_END) || (flags == 0)) {
+		if (cadence_qspi->cmd_len == 0) {
+			printf("QSPI: Error, command is empty.\n");
+			return -1;
+		}
+
+		if (data_in && data_bytes) {
+			/* read */
+			/* Use STIG if no address. */
+			if (!CQSPI_IS_ADDR(cadence_qspi->cmd_len))
+				mode = CQSPI_STIG_READ;
+			else
+				mode = CQSPI_INDIRECT_READ;
+		} else if (data_out && !(flags & SPI_XFER_BEGIN)) {
+			/* write */
+			if (!CQSPI_IS_ADDR(cadence_qspi->cmd_len))
+				mode = CQSPI_STIG_WRITE;
+			else
+				mode = CQSPI_INDIRECT_WRITE;
+		}
+
+		switch (mode) {
+		case CQSPI_STIG_READ:
+			err = cadence_qspi_apb_command_read(
+				cadence_qspi->cmd_len, cmd_buf,
+				data_bytes, data_in);
+
+		break;
+		case CQSPI_STIG_WRITE:
+			err = cadence_qspi_apb_command_write(
+				cadence_qspi->cmd_len, cmd_buf,
+				data_bytes, data_out);
+		break;
+		case CQSPI_INDIRECT_READ:
+			err = cadence_qspi_apb_indirect_read_setup(
+				QSPI_AHB_BASE,
+				cadence_qspi->cmd_len, cmd_buf);
+			if (!err) {
+				err = cadence_qspi_apb_indirect_read_execute
+				(ahbbase, data_bytes, data_in);
+			}
+		break;
+		case CQSPI_INDIRECT_WRITE:
+			err = cadence_qspi_apb_indirect_write_setup
+				(QSPI_AHB_BASE,
+				cadence_qspi->cmd_len, cmd_buf);
+			if (!err) {
+				err = cadence_qspi_apb_indirect_write_execute
+				(ahbbase, data_bytes, data_out);
+			}
+		break;
+		default:
+			err = -1;
+			break;
+		}
+
+		if (flags & SPI_XFER_END) {
+			/* clear command buffer */
+			memset(cmd_buf, 0, sizeof(cadence_qspi->cmd_buf));
+			cadence_qspi->cmd_len = 0;
+		}
+	}
+	return err;
+}
+
+void spi_enter_xip(struct spi_slave *slave, char xip_dummy)
+{
+	/* Enter XiP */
+	cadence_qspi_apb_enter_xip(xip_dummy);
+	return;
+}
+
+
diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
new file mode 100644
index 0000000..346837e
--- /dev/null
+++ b/drivers/spi/cadence_qspi.h
@@ -0,0 +1,196 @@ 
+/*
+ * (C) Copyright 2014 Altera Corporation <www.altera.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef __CADENCE_QSPI_H__
+#define __CADENCE_QSPI_H__
+
+/*
+ * Macro required for this driver
+ *
+ * #define CONFIG_CQSPI_BASE		(SOCFPGA_QSPIREGS_ADDRESS)
+ * #define CONFIG_CQSPI_AHB_BASE		(SOCFPGA_QSPIDATA_ADDRESS)
+ * -> To specify base address for controller CSR base and AHB data base addr
+ *
+ * #define CONFIG_CQSPI_REF_CLK		(400000000)
+ * -> The clock frequency supplied from PLL to the QSPI controller
+ *
+ * #define CONFIG_CQSPI_PAGE_SIZE		(256)
+ * -> To define the page size of serial flash in bytes
+ *
+ * #define CONFIG_CQSPI_BLOCK_SIZE		(16)
+ * -> To define the block size of serial flash in pages
+ *
+ * #define CONFIG_CQSPI_DECODER		(0)
+ * -> To enable the 4-to-16 decoder which enable up to 16 serial flash devices
+ *
+ * #define CONFIG_CQSPI_TSHSL_NS		(200)
+ * #define CONFIG_CQSPI_TSD2D_NS		(255)
+ * #define CONFIG_CQSPI_TCHSH_NS		(20)
+ * #define CONFIG_CQSPI_TSLCH_NS		(20)
+ * -> Configure controller based on serial flash device timing characteristic
+ */
+
+#define QSPI_BASE				(CONFIG_CQSPI_BASE)
+#define QSPI_AHB_BASE				(CONFIG_CQSPI_AHB_BASE)
+#define CQSPI_IS_ADDR(cmd_len)			(cmd_len > 1 ? 1 : 0)
+
+struct cadence_qspi_slave {
+	struct spi_slave slave;
+	unsigned int	mode;
+	unsigned int	max_hz;
+	void		*regbase;
+	void		*ahbbase;
+	size_t		cmd_len;
+	u8		cmd_buf[32];
+	size_t		data_len;
+};
+
+struct cadence_qspi {
+	u32	cfg;
+	u32	devrd;
+	u32	devwr;
+	u32	delay;
+	u32	rddatacap;
+	u32	devsz;
+	u32	srampart;
+	u32	indaddrtrig;
+	u32	dmaper;
+	u32	remapaddr;
+	u32	modebit;
+	u32	sramfill;
+	u32	txthresh;
+	u32	rxthresh;
+	u32	_pad_0x38_0x3f[2];
+	u32	irqstat;
+	u32	irqmask;
+	u32	_pad_0x48_0x4f[2];
+	u32	lowwrprot;
+	u32	uppwrprot;
+	u32	wrprot;
+	u32	_pad_0x5c_0x5f;
+	u32	indrd;
+	u32	indrdwater;
+	u32	indrdstaddr;
+	u32	indrdcnt;
+	u32	indwr;
+	u32	indwrwater;
+	u32	indwrstaddr;
+	u32	indwrcnt;
+	u32	_pad_0x80_0x8f[4];
+	u32	flashcmd;
+	u32	flashcmdaddr;
+	u32	_pad_0x98_0x9f[2];
+	u32	flashcmdrddatalo;
+	u32	flashcmdrddataup;
+	u32	flashcmdwrdatalo;
+	u32	flashcmdwrdataup;
+	u32	_pad_0xb0_0xfb[19];
+	u32	moduleid;
+};
+
+/* Controller's configuration and status register */
+#define	CQSPI_REG_CONFIG_CLK_POL_LSB		1
+#define	CQSPI_REG_CONFIG_CLK_PHA_LSB		2
+#define	CQSPI_REG_CONFIG_ENABLE_MASK		(1 << 0)
+#define	CQSPI_REG_CONFIG_DIRECT_MASK		(1 << 7)
+#define	CQSPI_REG_CONFIG_DECODE_MASK		(1 << 9)
+#define	CQSPI_REG_CONFIG_XIP_IMM_MASK		(1 << 18)
+#define	CQSPI_REG_CONFIG_CHIPSELECT_LSB		10
+#define	CQSPI_REG_CONFIG_BAUD_LSB		19
+#define	CQSPI_REG_CONFIG_IDLE_LSB		31
+#define	CQSPI_REG_CONFIG_CHIPSELECT_MASK	0xF
+#define	CQSPI_REG_CONFIG_BAUD_MASK		0xF
+#define	CQSPI_REG_RD_INSTR_OPCODE_LSB		0
+#define	CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB	8
+#define	CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB	12
+#define	CQSPI_REG_RD_INSTR_TYPE_DATA_LSB	16
+#define	CQSPI_REG_RD_INSTR_MODE_EN_LSB		20
+#define	CQSPI_REG_RD_INSTR_DUMMY_LSB		24
+#define	CQSPI_REG_RD_INSTR_TYPE_INSTR_MASK	0x3
+#define	CQSPI_REG_RD_INSTR_TYPE_ADDR_MASK	0x3
+#define	CQSPI_REG_RD_INSTR_TYPE_DATA_MASK	0x3
+#define	CQSPI_REG_RD_INSTR_DUMMY_MASK		0x1F
+#define	CQSPI_REG_WR_INSTR_OPCODE_LSB		0
+#define	CQSPI_REG_DELAY_TSLCH_LSB		0
+#define	CQSPI_REG_DELAY_TCHSH_LSB		8
+#define	CQSPI_REG_DELAY_TSD2D_LSB		16
+#define	CQSPI_REG_DELAY_TSHSL_LSB		24
+#define	CQSPI_REG_DELAY_TSLCH_MASK		0xFF
+#define	CQSPI_REG_DELAY_TCHSH_MASK		0xFF
+#define	CQSPI_REG_DELAY_TSD2D_MASK		0xFF
+#define	CQSPI_REG_DELAY_TSHSL_MASK		0xFF
+#define	CQSPI_REG_RD_DATA_CAPTURE_BYPASS_LSB	0
+#define	CQSPI_REG_RD_DATA_CAPTURE_DELAY_LSB	1
+#define	CQSPI_REG_RD_DATA_CAPTURE_DELAY_MASK	0xF
+#define	CQSPI_REG_SIZE_ADDRESS_LSB		0
+#define	CQSPI_REG_SIZE_PAGE_LSB			4
+#define	CQSPI_REG_SIZE_BLOCK_LSB		16
+#define	CQSPI_REG_SIZE_ADDRESS_MASK		0xF
+#define	CQSPI_REG_SIZE_PAGE_MASK		0xFFF
+#define	CQSPI_REG_SIZE_BLOCK_MASK		0x3F
+#define	CQSPI_REG_SRAMLEVEL_RD_LSB		0
+#define	CQSPI_REG_SRAMLEVEL_WR_LSB		16
+#define	CQSPI_REG_SRAMLEVEL_RD_MASK		0xFFFF
+#define	CQSPI_REG_SRAMLEVEL_WR_MASK		0xFFFF
+#define	CQSPI_REG_INDIRECTRD_START_MASK		(1 << 0)
+#define	CQSPI_REG_INDIRECTRD_CANCEL_MASK	(1 << 1)
+#define	CQSPI_REG_INDIRECTRD_INPROGRESS_MASK	(1 << 2)
+#define	CQSPI_REG_INDIRECTRD_DONE_MASK		(1 << 5)
+#define	CQSPI_REG_CMDCTRL_EXECUTE_MASK		(1 << 0)
+#define	CQSPI_REG_CMDCTRL_INPROGRESS_MASK	(1 << 1)
+#define	CQSPI_REG_CMDCTRL_DUMMY_LSB		7
+#define	CQSPI_REG_CMDCTRL_WR_BYTES_LSB		12
+#define	CQSPI_REG_CMDCTRL_WR_EN_LSB		15
+#define	CQSPI_REG_CMDCTRL_ADD_BYTES_LSB		16
+#define	CQSPI_REG_CMDCTRL_ADDR_EN_LSB		19
+#define	CQSPI_REG_CMDCTRL_RD_BYTES_LSB		20
+#define	CQSPI_REG_CMDCTRL_RD_EN_LSB		23
+#define	CQSPI_REG_CMDCTRL_OPCODE_LSB		24
+#define	CQSPI_REG_CMDCTRL_DUMMY_MASK		0x1F
+#define	CQSPI_REG_CMDCTRL_WR_BYTES_MASK		0x7
+#define	CQSPI_REG_CMDCTRL_ADD_BYTES_MASK	0x3
+#define	CQSPI_REG_CMDCTRL_RD_BYTES_MASK		0x7
+#define	CQSPI_REG_CMDCTRL_OPCODE_MASK		0xFF
+#define	CQSPI_REG_INDIRECTWR_START_MASK		(1 << 0)
+#define	CQSPI_REG_INDIRECTWR_CANCEL_MASK	(1 << 1)
+#define	CQSPI_REG_INDIRECTWR_INPROGRESS_MASK	(1 << 2)
+#define	CQSPI_REG_INDIRECTWR_DONE_MASK		(1 << 5)
+
+/* Transfer type */
+#define CQSPI_STIG_READ				0
+#define CQSPI_STIG_WRITE			1
+#define CQSPI_INDIRECT_READ			2
+#define CQSPI_INDIRECT_WRITE			3
+
+/* Transfer mode */
+#define CQSPI_INST_TYPE_SINGLE			(0)
+#define CQSPI_INST_TYPE_DUAL			(1)
+#define CQSPI_INST_TYPE_QUAD			(2)
+
+/* controller operation setting */
+#define CQSPI_NO_DECODER_MAX_CS			(4)
+#define CQSPI_DECODER_MAX_CS			(16)
+#define CQSPI_READ_CAPTURE_MAX_DELAY		(16)
+#define CQSPI_REG_POLL_US			(1)
+#define CQSPI_REG_RETRY				(10000)
+#define CQSPI_POLL_IDLE_RETRY			(3)
+#define CQSPI_FIFO_WIDTH			(4)
+#define CQSPI_STIG_DATA_LEN_MAX			(8)
+#define CQSPI_INDIRECTTRIGGER_ADDR_MASK		(0xFFFFF)
+#define CQSPI_DUMMY_CLKS_PER_BYTE		(8)
+#define CQSPI_DUMMY_BYTES_MAX			(4)
+
+/* Controller sram size in word */
+#define CQSPI_REG_SRAM_SIZE_WORD		(128)
+#define CQSPI_REG_SRAM_RESV_WORDS		(2)
+#define CQSPI_REG_SRAM_PARTITION_WR		(1)
+#define CQSPI_REG_SRAM_PARTITION_RD		\
+	(CQSPI_REG_SRAM_SIZE_WORD - CQSPI_REG_SRAM_RESV_WORDS)
+#define CQSPI_REG_SRAM_THRESHOLD_WORDS		(50)
+#define CQSPI_REG_SRAM_FILL_THRESHOLD	\
+	((CQSPI_REG_SRAM_SIZE_WORD / 2) * CQSPI_FIFO_WIDTH)
+
+#endif /* __CADENCE_QSPI_H__ */