[U-Boot,2/2] spi: cadence-qspi: Add direct mode support
diff mbox series

Message ID 20191014132752.18534-3-vigneshr@ti.com
State Changes Requested
Delegated to: Jagannadha Sutradharudu Teki
Headers show
Series
  • spi: cadence-qspi: Move to spi-mem APIs
Related show

Commit Message

Vignesh Raghavendra Oct. 14, 2019, 1:27 p.m. UTC
Add support for Direct Access Controller mode of Cadence QSPI. This
allows MMIO access to SPI NOR flash providing better read performance.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
 drivers/spi/cadence_qspi.h     | 19 ++++++-----
 drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
 3 files changed, 87 insertions(+), 33 deletions(-)

Comments

Simon Goldschmidt Oct. 17, 2019, 11:39 a.m. UTC | #1
On Mon, Oct 14, 2019 at 3:27 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>
> Add support for Direct Access Controller mode of Cadence QSPI. This
> allows MMIO access to SPI NOR flash providing better read performance.
>
> Signed-off-by: Vignesh R <vigneshr@ti.com>
> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>

I've tested this on my socfpga cyclone5 board with an mt25ql256a (running at
50MHz) and it seems to work fine.

However, I had the impression it was a bit slower, not faster, although I
haven't measured, and running at 50MHz with 4 data lines, reading the whole
flash takes about 1.5 seconds only, so without actually measureing it, it's
hard to tell if this performance is really worth the 440 bytes of extra code
it adds?

Note that I'm already short on RAM in SPL (socfpga gen5), so these 440 bytes
do hurt. Note that patch 1/2 of this series reduced SPL code size by 320 bytes
for me :-)

Regards,
Simon


> ---
>  drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
>  drivers/spi/cadence_qspi.h     | 19 ++++++-----
>  drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
>  3 files changed, 87 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
> index 673a2e9a6c4c..6c98cbf39ae4 100644
> --- a/drivers/spi/cadence_qspi.c
> +++ b/drivers/spi/cadence_qspi.c
> @@ -12,12 +12,13 @@
>  #include <spi.h>
>  #include <spi-mem.h>
>  #include <linux/errno.h>
> +#include <linux/sizes.h>
>  #include "cadence_qspi.h"
>
>  #define CQSPI_STIG_READ                        0
>  #define CQSPI_STIG_WRITE               1
> -#define CQSPI_INDIRECT_READ            2
> -#define CQSPI_INDIRECT_WRITE           3
> +#define CQSPI_READ                     2
> +#define CQSPI_WRITE                    3
>
>  static int cadence_spi_write_speed(struct udevice *bus, uint hz)
>  {
> @@ -189,6 +190,7 @@ static int cadence_spi_remove(struct udevice *dev)
>
>  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
>  {
> +       struct cadence_spi_platdata *plat = bus->platdata;
>         struct cadence_spi_priv *priv = dev_get_priv(bus);
>
>         /* Disable QSPI */
> @@ -197,6 +199,10 @@ static int cadence_spi_set_mode(struct udevice *bus, uint mode)
>         /* Set SPI mode */
>         cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
>
> +       /* Enable Direct Access Controller */
> +       if (plat->use_dac_mode)
> +               cadence_qspi_apb_dac_mode_enable(priv->regbase);
> +
>         /* Enable QSPI */
>         cadence_qspi_apb_controller_enable(priv->regbase);
>
> @@ -221,12 +227,12 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
>                 if (!op->addr.nbytes)
>                         mode = CQSPI_STIG_READ;
>                 else
> -                       mode = CQSPI_INDIRECT_READ;
> +                       mode = CQSPI_READ;
>         } else {
>                 if (!op->addr.nbytes || !op->data.buf.out)
>                         mode = CQSPI_STIG_WRITE;
>                 else
> -                       mode = CQSPI_INDIRECT_WRITE;
> +                       mode = CQSPI_WRITE;
>         }
>
>         switch (mode) {
> @@ -236,19 +242,15 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
>         case CQSPI_STIG_WRITE:
>                 err = cadence_qspi_apb_command_write(base, op);
>                 break;
> -       case CQSPI_INDIRECT_READ:
> -               err = cadence_qspi_apb_indirect_read_setup(plat, op);
> -               if (!err) {
> -                       err = cadence_qspi_apb_indirect_read_execute
> -                               (plat, op->data.nbytes, op->data.buf.in);
> -               }
> +       case CQSPI_READ:
> +               err = cadence_qspi_apb_read_setup(plat, op);
> +               if (!err)
> +                       err = cadence_qspi_apb_read_execute(plat, op);
>                 break;
> -       case CQSPI_INDIRECT_WRITE:
> -               err = cadence_qspi_apb_indirect_write_setup(plat, op);
> -               if (!err) {
> -                       err = cadence_qspi_apb_indirect_write_execute
> -                       (plat, op->data.nbytes, op->data.buf.out);
> -               }
> +       case CQSPI_WRITE:
> +               err = cadence_qspi_apb_write_setup(plat, op);
> +               if (!err)
> +                       err = cadence_qspi_apb_write_execute(plat, op);
>                 break;
>         default:
>                 err = -1;
> @@ -264,13 +266,17 @@ static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
>         ofnode subnode;
>
>         plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
> -       plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
> +       plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
> +                       &plat->ahbsize);
>         plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
>         plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
>         plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
>         plat->trigger_address = dev_read_u32_default(bus,
>                                                      "cdns,trigger-address",
>                                                      0);
> +       /* Use DAC mode only when MMIO window is at least 8M wide */
> +       if (plat->ahbsize >= SZ_8M)
> +               plat->use_dac_mode = true;
>
>         /* All other paramters are embedded in the child node */
>         subnode = dev_read_first_subnode(bus);
> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
> index e655b027d788..619f0bed8efd 100644
> --- a/drivers/spi/cadence_qspi.h
> +++ b/drivers/spi/cadence_qspi.h
> @@ -23,6 +23,8 @@ struct cadence_spi_platdata {
>         u32             fifo_depth;
>         u32             fifo_width;
>         u32             trigger_address;
> +       fdt_addr_t      ahbsize;
> +       bool            use_dac_mode;
>
>         /* Flash parameters */
>         u32             page_size;
> @@ -52,20 +54,21 @@ struct cadence_spi_priv {
>  void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
>  void cadence_qspi_apb_controller_enable(void *reg_base_addr);
>  void cadence_qspi_apb_controller_disable(void *reg_base_addr);
> +void cadence_qspi_apb_dac_mode_enable(void *reg_base);
>
>  int cadence_qspi_apb_command_read(void *reg_base_addr,
>                                   const struct spi_mem_op *op);
>  int cadence_qspi_apb_command_write(void *reg_base_addr,
>                                    const struct spi_mem_op *op);
>
> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> -       const struct spi_mem_op *op);
> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> -       unsigned int rxlen, u8 *rxbuf);
> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> -       const struct spi_mem_op *op);
> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> -       unsigned int txlen, const u8 *txbuf);
> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> +                               const struct spi_mem_op *op);
> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> +                                 const struct spi_mem_op *op);
> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> +                                const struct spi_mem_op *op);
> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> +                                  const struct spi_mem_op *op);
>
>  void cadence_qspi_apb_chipselect(void *reg_base,
>         unsigned int chip_select, unsigned int decoder_enable);
> diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
> index 8dd0495dfcf4..fd491f2c8104 100644
> --- a/drivers/spi/cadence_qspi_apb.c
> +++ b/drivers/spi/cadence_qspi_apb.c
> @@ -189,6 +189,15 @@ void cadence_qspi_apb_controller_disable(void *reg_base)
>         writel(reg, reg_base + CQSPI_REG_CONFIG);
>  }
>
> +void cadence_qspi_apb_dac_mode_enable(void *reg_base)
> +{
> +       unsigned int reg;
> +
> +       reg = readl(reg_base + CQSPI_REG_CONFIG);
> +       reg |= CQSPI_REG_CONFIG_DIRECT;
> +       writel(reg, reg_base + CQSPI_REG_CONFIG);
> +}
> +
>  /* Return 1 if idle, otherwise return 0 (busy). */
>  static unsigned int cadence_qspi_wait_idle(void *reg_base)
>  {
> @@ -512,8 +521,8 @@ int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
>  }
>
>  /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> -       const struct spi_mem_op *op)
> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> +                               const struct spi_mem_op *op)
>  {
>         unsigned int reg;
>         unsigned int rd_reg;
> @@ -577,8 +586,9 @@ static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
>         return -ETIMEDOUT;
>  }
>
> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> -       unsigned int n_rx, u8 *rxbuf)
> +static int
> +cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> +                                      unsigned int n_rx, u8 *rxbuf)
>  {
>         unsigned int remaining = n_rx;
>         unsigned int bytes_to_read = 0;
> @@ -639,9 +649,26 @@ failrd:
>         return ret;
>  }
>
> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> +                                 const struct spi_mem_op *op)
> +{
> +       u32 from = op->addr.val;
> +       void *buf = op->data.buf.in;
> +       size_t len = op->data.nbytes;
> +
> +       if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
> +               memcpy_fromio(buf, plat->ahbbase + from, len);
> +               if (!cadence_qspi_wait_idle(plat->regbase))
> +                       return -EIO;
> +               return 0;
> +       }
> +
> +       return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
> +}
> +
>  /* Opcode + Address (3/4 bytes) */
> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> -       const struct spi_mem_op *op)
> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> +                                const struct spi_mem_op *op)
>  {
>         unsigned int reg;
>
> @@ -662,8 +689,9 @@ int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>         return 0;
>  }
>
> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> -       unsigned int n_tx, const u8 *txbuf)
> +static int
> +cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> +                                       unsigned int n_tx, const u8 *txbuf)
>  {
>         unsigned int page_size = plat->page_size;
>         unsigned int remaining = n_tx;
> @@ -735,6 +763,23 @@ failwr:
>         return ret;
>  }
>
> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> +                                  const struct spi_mem_op *op)
> +{
> +       u32 to = op->addr.val;
> +       const void *buf = op->data.buf.out;
> +       size_t len = op->data.nbytes;
> +
> +       if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
> +               memcpy_toio(plat->ahbbase + to, buf, len);
> +               if (!cadence_qspi_wait_idle(plat->regbase))
> +                       return -EIO;
> +               return 0;
> +       }
> +
> +       return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
> +}
> +
>  void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
>  {
>         unsigned int reg;
> --
> 2.23.0
>
Vignesh Raghavendra Oct. 17, 2019, 12:44 p.m. UTC | #2
Hi,

On 17/10/19 5:09 PM, Simon Goldschmidt wrote:
> On Mon, Oct 14, 2019 at 3:27 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>>
>> Add support for Direct Access Controller mode of Cadence QSPI. This
>> allows MMIO access to SPI NOR flash providing better read performance.
>>
>> Signed-off-by: Vignesh R <vigneshr@ti.com>
>> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> 
> I've tested this on my socfpga cyclone5 board with an mt25ql256a (running at
> 50MHz) and it seems to work fine.
> 
> However, I had the impression it was a bit slower, not faster, although I
> haven't measured, and running at 50MHz with 4 data lines, reading the whole
> flash takes about 1.5 seconds only, so without actually measureing it, it's
> hard to tell if this performance is really worth the 440 bytes of extra code
> it adds?
> 

I should have noted in the commit msg that direct mode is used only when
AHB window is at least 8MB. Working with smaller window sizes would mean
code would fall back to indirect mode most of the time.
I see that socfgpa don't seem to have large AHB windows and therefore
would not use direct access mode.

On TI platforms its possible to use DMA to read data from memory mapped
region using mem to mem DMA channels which improves throughput by 5 times.

> Note that I'm already short on RAM in SPL (socfpga gen5), so these 440 bytes
> do hurt. Note that patch 1/2 of this series reduced SPL code size by 320 bytes
> for me :-)
> 

Hmm, I read this as patch 1 brings down memory consumption by 320 bytes
and patch 2 adds 440 bytes (delta being +120 bytes for entire series).
I can see if size can be optimized, but would like to avoid #ifdef'ery
within the driver if possible.

Regards
Vignesh

> Regards,
> Simon
> 
> 
>> ---
>>  drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
>>  drivers/spi/cadence_qspi.h     | 19 ++++++-----
>>  drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
>>  3 files changed, 87 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
>> index 673a2e9a6c4c..6c98cbf39ae4 100644
>> --- a/drivers/spi/cadence_qspi.c
>> +++ b/drivers/spi/cadence_qspi.c
>> @@ -12,12 +12,13 @@
>>  #include <spi.h>
>>  #include <spi-mem.h>
>>  #include <linux/errno.h>
>> +#include <linux/sizes.h>
>>  #include "cadence_qspi.h"
>>
>>  #define CQSPI_STIG_READ                        0
>>  #define CQSPI_STIG_WRITE               1
>> -#define CQSPI_INDIRECT_READ            2
>> -#define CQSPI_INDIRECT_WRITE           3
>> +#define CQSPI_READ                     2
>> +#define CQSPI_WRITE                    3
>>
>>  static int cadence_spi_write_speed(struct udevice *bus, uint hz)
>>  {
>> @@ -189,6 +190,7 @@ static int cadence_spi_remove(struct udevice *dev)
>>
>>  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
>>  {
>> +       struct cadence_spi_platdata *plat = bus->platdata;
>>         struct cadence_spi_priv *priv = dev_get_priv(bus);
>>
>>         /* Disable QSPI */
>> @@ -197,6 +199,10 @@ static int cadence_spi_set_mode(struct udevice *bus, uint mode)
>>         /* Set SPI mode */
>>         cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
>>
>> +       /* Enable Direct Access Controller */
>> +       if (plat->use_dac_mode)
>> +               cadence_qspi_apb_dac_mode_enable(priv->regbase);
>> +
>>         /* Enable QSPI */
>>         cadence_qspi_apb_controller_enable(priv->regbase);
>>
>> @@ -221,12 +227,12 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
>>                 if (!op->addr.nbytes)
>>                         mode = CQSPI_STIG_READ;
>>                 else
>> -                       mode = CQSPI_INDIRECT_READ;
>> +                       mode = CQSPI_READ;
>>         } else {
>>                 if (!op->addr.nbytes || !op->data.buf.out)
>>                         mode = CQSPI_STIG_WRITE;
>>                 else
>> -                       mode = CQSPI_INDIRECT_WRITE;
>> +                       mode = CQSPI_WRITE;
>>         }
>>
>>         switch (mode) {
>> @@ -236,19 +242,15 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
>>         case CQSPI_STIG_WRITE:
>>                 err = cadence_qspi_apb_command_write(base, op);
>>                 break;
>> -       case CQSPI_INDIRECT_READ:
>> -               err = cadence_qspi_apb_indirect_read_setup(plat, op);
>> -               if (!err) {
>> -                       err = cadence_qspi_apb_indirect_read_execute
>> -                               (plat, op->data.nbytes, op->data.buf.in);
>> -               }
>> +       case CQSPI_READ:
>> +               err = cadence_qspi_apb_read_setup(plat, op);
>> +               if (!err)
>> +                       err = cadence_qspi_apb_read_execute(plat, op);
>>                 break;
>> -       case CQSPI_INDIRECT_WRITE:
>> -               err = cadence_qspi_apb_indirect_write_setup(plat, op);
>> -               if (!err) {
>> -                       err = cadence_qspi_apb_indirect_write_execute
>> -                       (plat, op->data.nbytes, op->data.buf.out);
>> -               }
>> +       case CQSPI_WRITE:
>> +               err = cadence_qspi_apb_write_setup(plat, op);
>> +               if (!err)
>> +                       err = cadence_qspi_apb_write_execute(plat, op);
>>                 break;
>>         default:
>>                 err = -1;
>> @@ -264,13 +266,17 @@ static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
>>         ofnode subnode;
>>
>>         plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
>> -       plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
>> +       plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
>> +                       &plat->ahbsize);
>>         plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
>>         plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
>>         plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
>>         plat->trigger_address = dev_read_u32_default(bus,
>>                                                      "cdns,trigger-address",
>>                                                      0);
>> +       /* Use DAC mode only when MMIO window is at least 8M wide */
>> +       if (plat->ahbsize >= SZ_8M)
>> +               plat->use_dac_mode = true;
>>
>>         /* All other paramters are embedded in the child node */
>>         subnode = dev_read_first_subnode(bus);
>> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
>> index e655b027d788..619f0bed8efd 100644
>> --- a/drivers/spi/cadence_qspi.h
>> +++ b/drivers/spi/cadence_qspi.h
>> @@ -23,6 +23,8 @@ struct cadence_spi_platdata {
>>         u32             fifo_depth;
>>         u32             fifo_width;
>>         u32             trigger_address;
>> +       fdt_addr_t      ahbsize;
>> +       bool            use_dac_mode;
>>
>>         /* Flash parameters */
>>         u32             page_size;
>> @@ -52,20 +54,21 @@ struct cadence_spi_priv {
>>  void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
>>  void cadence_qspi_apb_controller_enable(void *reg_base_addr);
>>  void cadence_qspi_apb_controller_disable(void *reg_base_addr);
>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base);
>>
>>  int cadence_qspi_apb_command_read(void *reg_base_addr,
>>                                   const struct spi_mem_op *op);
>>  int cadence_qspi_apb_command_write(void *reg_base_addr,
>>                                    const struct spi_mem_op *op);
>>
>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
>> -       const struct spi_mem_op *op);
>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>> -       unsigned int rxlen, u8 *rxbuf);
>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>> -       const struct spi_mem_op *op);
>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>> -       unsigned int txlen, const u8 *txbuf);
>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
>> +                               const struct spi_mem_op *op);
>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
>> +                                 const struct spi_mem_op *op);
>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
>> +                                const struct spi_mem_op *op);
>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
>> +                                  const struct spi_mem_op *op);
>>
>>  void cadence_qspi_apb_chipselect(void *reg_base,
>>         unsigned int chip_select, unsigned int decoder_enable);
>> diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
>> index 8dd0495dfcf4..fd491f2c8104 100644
>> --- a/drivers/spi/cadence_qspi_apb.c
>> +++ b/drivers/spi/cadence_qspi_apb.c
>> @@ -189,6 +189,15 @@ void cadence_qspi_apb_controller_disable(void *reg_base)
>>         writel(reg, reg_base + CQSPI_REG_CONFIG);
>>  }
>>
>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base)
>> +{
>> +       unsigned int reg;
>> +
>> +       reg = readl(reg_base + CQSPI_REG_CONFIG);
>> +       reg |= CQSPI_REG_CONFIG_DIRECT;
>> +       writel(reg, reg_base + CQSPI_REG_CONFIG);
>> +}
>> +
>>  /* Return 1 if idle, otherwise return 0 (busy). */
>>  static unsigned int cadence_qspi_wait_idle(void *reg_base)
>>  {
>> @@ -512,8 +521,8 @@ int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
>>  }
>>
>>  /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
>> -       const struct spi_mem_op *op)
>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
>> +                               const struct spi_mem_op *op)
>>  {
>>         unsigned int reg;
>>         unsigned int rd_reg;
>> @@ -577,8 +586,9 @@ static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
>>         return -ETIMEDOUT;
>>  }
>>
>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>> -       unsigned int n_rx, u8 *rxbuf)
>> +static int
>> +cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>> +                                      unsigned int n_rx, u8 *rxbuf)
>>  {
>>         unsigned int remaining = n_rx;
>>         unsigned int bytes_to_read = 0;
>> @@ -639,9 +649,26 @@ failrd:
>>         return ret;
>>  }
>>
>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
>> +                                 const struct spi_mem_op *op)
>> +{
>> +       u32 from = op->addr.val;
>> +       void *buf = op->data.buf.in;
>> +       size_t len = op->data.nbytes;
>> +
>> +       if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
>> +               memcpy_fromio(buf, plat->ahbbase + from, len);
>> +               if (!cadence_qspi_wait_idle(plat->regbase))
>> +                       return -EIO;
>> +               return 0;
>> +       }
>> +
>> +       return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
>> +}
>> +
>>  /* Opcode + Address (3/4 bytes) */
>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>> -       const struct spi_mem_op *op)
>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
>> +                                const struct spi_mem_op *op)
>>  {
>>         unsigned int reg;
>>
>> @@ -662,8 +689,9 @@ int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>>         return 0;
>>  }
>>
>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>> -       unsigned int n_tx, const u8 *txbuf)
>> +static int
>> +cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>> +                                       unsigned int n_tx, const u8 *txbuf)
>>  {
>>         unsigned int page_size = plat->page_size;
>>         unsigned int remaining = n_tx;
>> @@ -735,6 +763,23 @@ failwr:
>>         return ret;
>>  }
>>
>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
>> +                                  const struct spi_mem_op *op)
>> +{
>> +       u32 to = op->addr.val;
>> +       const void *buf = op->data.buf.out;
>> +       size_t len = op->data.nbytes;
>> +
>> +       if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
>> +               memcpy_toio(plat->ahbbase + to, buf, len);
>> +               if (!cadence_qspi_wait_idle(plat->regbase))
>> +                       return -EIO;
>> +               return 0;
>> +       }
>> +
>> +       return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
>> +}
>> +
>>  void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
>>  {
>>         unsigned int reg;
>> --
>> 2.23.0
>>
Simon Goldschmidt Oct. 17, 2019, 12:55 p.m. UTC | #3
On Thu, Oct 17, 2019 at 2:44 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>
> Hi,
>
> On 17/10/19 5:09 PM, Simon Goldschmidt wrote:
> > On Mon, Oct 14, 2019 at 3:27 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
> >>
> >> Add support for Direct Access Controller mode of Cadence QSPI. This
> >> allows MMIO access to SPI NOR flash providing better read performance.
> >>
> >> Signed-off-by: Vignesh R <vigneshr@ti.com>
> >> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> >
> > I've tested this on my socfpga cyclone5 board with an mt25ql256a (running at
> > 50MHz) and it seems to work fine.
> >
> > However, I had the impression it was a bit slower, not faster, although I
> > haven't measured, and running at 50MHz with 4 data lines, reading the whole
> > flash takes about 1.5 seconds only, so without actually measureing it, it's
> > hard to tell if this performance is really worth the 440 bytes of extra code
> > it adds?
> >
>
> I should have noted in the commit msg that direct mode is used only when
> AHB window is at least 8MB. Working with smaller window sizes would mean
> code would fall back to indirect mode most of the time.
> I see that socfgpa don't seem to have large AHB windows and therefore
> would not use direct access mode.

Aha. Yes, socfpga gen5 has a 1MB window only, I think. So the speed hasn't
changed.

>
> On TI platforms its possible to use DMA to read data from memory mapped
> region using mem to mem DMA channels which improves throughput by 5 times.

Hmm, I'm using a QSPI at 50 MHz and transferring 31 MByte takes about 1.5
seconds. The maximum speed of that setup would be 25 MByte/s without any
overhead (1.24 seconds for 31 MB). There's no way I could achieve an improvement
by 5 on my platform!

>
> > Note that I'm already short on RAM in SPL (socfpga gen5), so these 440 bytes
> > do hurt. Note that patch 1/2 of this series reduced SPL code size by 320 bytes
> > for me :-)
> >
>
> Hmm, I read this as patch 1 brings down memory consumption by 320 bytes
> and patch 2 adds 440 bytes (delta being +120 bytes for entire series).
> I can see if size can be optimized, but would like to avoid #ifdef'ery
> within the driver if possible.

Well, I can only say I'm currently struggling to keep SPI-NOR and MMC enabled
at the same time in socfpga_socrates (moving more code to DM). And even if it
sounds like not much, 440 bytes *are* much for me at this point.

I still would prefer having a Kconfig option for this that can be disabled
for socfpga.

Regards,
Simon

>
> Regards
> Vignesh
>
> > Regards,
> > Simon
> >
> >
> >> ---
> >>  drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
> >>  drivers/spi/cadence_qspi.h     | 19 ++++++-----
> >>  drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
> >>  3 files changed, 87 insertions(+), 33 deletions(-)
> >>
> >> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
> >> index 673a2e9a6c4c..6c98cbf39ae4 100644
> >> --- a/drivers/spi/cadence_qspi.c
> >> +++ b/drivers/spi/cadence_qspi.c
> >> @@ -12,12 +12,13 @@
> >>  #include <spi.h>
> >>  #include <spi-mem.h>
> >>  #include <linux/errno.h>
> >> +#include <linux/sizes.h>
> >>  #include "cadence_qspi.h"
> >>
> >>  #define CQSPI_STIG_READ                        0
> >>  #define CQSPI_STIG_WRITE               1
> >> -#define CQSPI_INDIRECT_READ            2
> >> -#define CQSPI_INDIRECT_WRITE           3
> >> +#define CQSPI_READ                     2
> >> +#define CQSPI_WRITE                    3
> >>
> >>  static int cadence_spi_write_speed(struct udevice *bus, uint hz)
> >>  {
> >> @@ -189,6 +190,7 @@ static int cadence_spi_remove(struct udevice *dev)
> >>
> >>  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
> >>  {
> >> +       struct cadence_spi_platdata *plat = bus->platdata;
> >>         struct cadence_spi_priv *priv = dev_get_priv(bus);
> >>
> >>         /* Disable QSPI */
> >> @@ -197,6 +199,10 @@ static int cadence_spi_set_mode(struct udevice *bus, uint mode)
> >>         /* Set SPI mode */
> >>         cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
> >>
> >> +       /* Enable Direct Access Controller */
> >> +       if (plat->use_dac_mode)
> >> +               cadence_qspi_apb_dac_mode_enable(priv->regbase);
> >> +
> >>         /* Enable QSPI */
> >>         cadence_qspi_apb_controller_enable(priv->regbase);
> >>
> >> @@ -221,12 +227,12 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
> >>                 if (!op->addr.nbytes)
> >>                         mode = CQSPI_STIG_READ;
> >>                 else
> >> -                       mode = CQSPI_INDIRECT_READ;
> >> +                       mode = CQSPI_READ;
> >>         } else {
> >>                 if (!op->addr.nbytes || !op->data.buf.out)
> >>                         mode = CQSPI_STIG_WRITE;
> >>                 else
> >> -                       mode = CQSPI_INDIRECT_WRITE;
> >> +                       mode = CQSPI_WRITE;
> >>         }
> >>
> >>         switch (mode) {
> >> @@ -236,19 +242,15 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
> >>         case CQSPI_STIG_WRITE:
> >>                 err = cadence_qspi_apb_command_write(base, op);
> >>                 break;
> >> -       case CQSPI_INDIRECT_READ:
> >> -               err = cadence_qspi_apb_indirect_read_setup(plat, op);
> >> -               if (!err) {
> >> -                       err = cadence_qspi_apb_indirect_read_execute
> >> -                               (plat, op->data.nbytes, op->data.buf.in);
> >> -               }
> >> +       case CQSPI_READ:
> >> +               err = cadence_qspi_apb_read_setup(plat, op);
> >> +               if (!err)
> >> +                       err = cadence_qspi_apb_read_execute(plat, op);
> >>                 break;
> >> -       case CQSPI_INDIRECT_WRITE:
> >> -               err = cadence_qspi_apb_indirect_write_setup(plat, op);
> >> -               if (!err) {
> >> -                       err = cadence_qspi_apb_indirect_write_execute
> >> -                       (plat, op->data.nbytes, op->data.buf.out);
> >> -               }
> >> +       case CQSPI_WRITE:
> >> +               err = cadence_qspi_apb_write_setup(plat, op);
> >> +               if (!err)
> >> +                       err = cadence_qspi_apb_write_execute(plat, op);
> >>                 break;
> >>         default:
> >>                 err = -1;
> >> @@ -264,13 +266,17 @@ static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
> >>         ofnode subnode;
> >>
> >>         plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
> >> -       plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
> >> +       plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
> >> +                       &plat->ahbsize);
> >>         plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
> >>         plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
> >>         plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
> >>         plat->trigger_address = dev_read_u32_default(bus,
> >>                                                      "cdns,trigger-address",
> >>                                                      0);
> >> +       /* Use DAC mode only when MMIO window is at least 8M wide */
> >> +       if (plat->ahbsize >= SZ_8M)
> >> +               plat->use_dac_mode = true;
> >>
> >>         /* All other paramters are embedded in the child node */
> >>         subnode = dev_read_first_subnode(bus);
> >> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
> >> index e655b027d788..619f0bed8efd 100644
> >> --- a/drivers/spi/cadence_qspi.h
> >> +++ b/drivers/spi/cadence_qspi.h
> >> @@ -23,6 +23,8 @@ struct cadence_spi_platdata {
> >>         u32             fifo_depth;
> >>         u32             fifo_width;
> >>         u32             trigger_address;
> >> +       fdt_addr_t      ahbsize;
> >> +       bool            use_dac_mode;
> >>
> >>         /* Flash parameters */
> >>         u32             page_size;
> >> @@ -52,20 +54,21 @@ struct cadence_spi_priv {
> >>  void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
> >>  void cadence_qspi_apb_controller_enable(void *reg_base_addr);
> >>  void cadence_qspi_apb_controller_disable(void *reg_base_addr);
> >> +void cadence_qspi_apb_dac_mode_enable(void *reg_base);
> >>
> >>  int cadence_qspi_apb_command_read(void *reg_base_addr,
> >>                                   const struct spi_mem_op *op);
> >>  int cadence_qspi_apb_command_write(void *reg_base_addr,
> >>                                    const struct spi_mem_op *op);
> >>
> >> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> >> -       const struct spi_mem_op *op);
> >> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >> -       unsigned int rxlen, u8 *rxbuf);
> >> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >> -       const struct spi_mem_op *op);
> >> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >> -       unsigned int txlen, const u8 *txbuf);
> >> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> >> +                               const struct spi_mem_op *op);
> >> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> >> +                                 const struct spi_mem_op *op);
> >> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> >> +                                const struct spi_mem_op *op);
> >> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> >> +                                  const struct spi_mem_op *op);
> >>
> >>  void cadence_qspi_apb_chipselect(void *reg_base,
> >>         unsigned int chip_select, unsigned int decoder_enable);
> >> diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
> >> index 8dd0495dfcf4..fd491f2c8104 100644
> >> --- a/drivers/spi/cadence_qspi_apb.c
> >> +++ b/drivers/spi/cadence_qspi_apb.c
> >> @@ -189,6 +189,15 @@ void cadence_qspi_apb_controller_disable(void *reg_base)
> >>         writel(reg, reg_base + CQSPI_REG_CONFIG);
> >>  }
> >>
> >> +void cadence_qspi_apb_dac_mode_enable(void *reg_base)
> >> +{
> >> +       unsigned int reg;
> >> +
> >> +       reg = readl(reg_base + CQSPI_REG_CONFIG);
> >> +       reg |= CQSPI_REG_CONFIG_DIRECT;
> >> +       writel(reg, reg_base + CQSPI_REG_CONFIG);
> >> +}
> >> +
> >>  /* Return 1 if idle, otherwise return 0 (busy). */
> >>  static unsigned int cadence_qspi_wait_idle(void *reg_base)
> >>  {
> >> @@ -512,8 +521,8 @@ int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
> >>  }
> >>
> >>  /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
> >> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> >> -       const struct spi_mem_op *op)
> >> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> >> +                               const struct spi_mem_op *op)
> >>  {
> >>         unsigned int reg;
> >>         unsigned int rd_reg;
> >> @@ -577,8 +586,9 @@ static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
> >>         return -ETIMEDOUT;
> >>  }
> >>
> >> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >> -       unsigned int n_rx, u8 *rxbuf)
> >> +static int
> >> +cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >> +                                      unsigned int n_rx, u8 *rxbuf)
> >>  {
> >>         unsigned int remaining = n_rx;
> >>         unsigned int bytes_to_read = 0;
> >> @@ -639,9 +649,26 @@ failrd:
> >>         return ret;
> >>  }
> >>
> >> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> >> +                                 const struct spi_mem_op *op)
> >> +{
> >> +       u32 from = op->addr.val;
> >> +       void *buf = op->data.buf.in;
> >> +       size_t len = op->data.nbytes;
> >> +
> >> +       if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
> >> +               memcpy_fromio(buf, plat->ahbbase + from, len);
> >> +               if (!cadence_qspi_wait_idle(plat->regbase))
> >> +                       return -EIO;
> >> +               return 0;
> >> +       }
> >> +
> >> +       return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
> >> +}
> >> +
> >>  /* Opcode + Address (3/4 bytes) */
> >> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >> -       const struct spi_mem_op *op)
> >> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> >> +                                const struct spi_mem_op *op)
> >>  {
> >>         unsigned int reg;
> >>
> >> @@ -662,8 +689,9 @@ int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >>         return 0;
> >>  }
> >>
> >> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >> -       unsigned int n_tx, const u8 *txbuf)
> >> +static int
> >> +cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >> +                                       unsigned int n_tx, const u8 *txbuf)
> >>  {
> >>         unsigned int page_size = plat->page_size;
> >>         unsigned int remaining = n_tx;
> >> @@ -735,6 +763,23 @@ failwr:
> >>         return ret;
> >>  }
> >>
> >> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> >> +                                  const struct spi_mem_op *op)
> >> +{
> >> +       u32 to = op->addr.val;
> >> +       const void *buf = op->data.buf.out;
> >> +       size_t len = op->data.nbytes;
> >> +
> >> +       if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
> >> +               memcpy_toio(plat->ahbbase + to, buf, len);
> >> +               if (!cadence_qspi_wait_idle(plat->regbase))
> >> +                       return -EIO;
> >> +               return 0;
> >> +       }
> >> +
> >> +       return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
> >> +}
> >> +
> >>  void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
> >>  {
> >>         unsigned int reg;
> >> --
> >> 2.23.0
> >>
>
> --
> Regards
> Vignesh
Simon Goldschmidt Oct. 18, 2019, 9:04 a.m. UTC | #4
On Thu, Oct 17, 2019 at 2:55 PM Simon Goldschmidt
<simon.k.r.goldschmidt@gmail.com> wrote:
>
> On Thu, Oct 17, 2019 at 2:44 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
> >
> > Hi,
> >
> > On 17/10/19 5:09 PM, Simon Goldschmidt wrote:
> > > On Mon, Oct 14, 2019 at 3:27 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
> > >>
> > >> Add support for Direct Access Controller mode of Cadence QSPI. This
> > >> allows MMIO access to SPI NOR flash providing better read performance.
> > >>
> > >> Signed-off-by: Vignesh R <vigneshr@ti.com>
> > >> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> > >
> > > I've tested this on my socfpga cyclone5 board with an mt25ql256a (running at
> > > 50MHz) and it seems to work fine.
> > >
> > > However, I had the impression it was a bit slower, not faster, although I
> > > haven't measured, and running at 50MHz with 4 data lines, reading the whole
> > > flash takes about 1.5 seconds only, so without actually measureing it, it's
> > > hard to tell if this performance is really worth the 440 bytes of extra code
> > > it adds?
> > >
> >
> > I should have noted in the commit msg that direct mode is used only when
> > AHB window is at least 8MB. Working with smaller window sizes would mean
> > code would fall back to indirect mode most of the time.
> > I see that socfgpa don't seem to have large AHB windows and therefore
> > would not use direct access mode.
>
> Aha. Yes, socfpga gen5 has a 1MB window only, I think. So the speed hasn't
> changed.
>
> >
> > On TI platforms its possible to use DMA to read data from memory mapped
> > region using mem to mem DMA channels which improves throughput by 5 times.
>
> Hmm, I'm using a QSPI at 50 MHz and transferring 31 MByte takes about 1.5
> seconds. The maximum speed of that setup would be 25 MByte/s without any
> overhead (1.24 seconds for 31 MB). There's no way I could achieve an improvement
> by 5 on my platform!
>
> >
> > > Note that I'm already short on RAM in SPL (socfpga gen5), so these 440 bytes
> > > do hurt. Note that patch 1/2 of this series reduced SPL code size by 320 bytes
> > > for me :-)
> > >
> >
> > Hmm, I read this as patch 1 brings down memory consumption by 320 bytes
> > and patch 2 adds 440 bytes (delta being +120 bytes for entire series).
> > I can see if size can be optimized, but would like to avoid #ifdef'ery
> > within the driver if possible.
>
> Well, I can only say I'm currently struggling to keep SPI-NOR and MMC enabled
> at the same time in socfpga_socrates (moving more code to DM). And even if it
> sounds like not much, 440 bytes *are* much for me at this point.
>
> I still would prefer having a Kconfig option for this that can be disabled
> for socfpga.

Oh well, maybe just go ahead adding this and I'll try how it fits. We can alway
add reduced functionality later by checking for CONFIG_SPL_SPI_FLASH_TINY,
whicht might be a better config option to guard this than something cadence-qspi
specific.

Regards,
Simon

>
> Regards,
> Simon
>
> >
> > Regards
> > Vignesh
> >
> > > Regards,
> > > Simon
> > >
> > >
> > >> ---
> > >>  drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
> > >>  drivers/spi/cadence_qspi.h     | 19 ++++++-----
> > >>  drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
> > >>  3 files changed, 87 insertions(+), 33 deletions(-)
> > >>
> > >> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
> > >> index 673a2e9a6c4c..6c98cbf39ae4 100644
> > >> --- a/drivers/spi/cadence_qspi.c
> > >> +++ b/drivers/spi/cadence_qspi.c
> > >> @@ -12,12 +12,13 @@
> > >>  #include <spi.h>
> > >>  #include <spi-mem.h>
> > >>  #include <linux/errno.h>
> > >> +#include <linux/sizes.h>
> > >>  #include "cadence_qspi.h"
> > >>
> > >>  #define CQSPI_STIG_READ                        0
> > >>  #define CQSPI_STIG_WRITE               1
> > >> -#define CQSPI_INDIRECT_READ            2
> > >> -#define CQSPI_INDIRECT_WRITE           3
> > >> +#define CQSPI_READ                     2
> > >> +#define CQSPI_WRITE                    3
> > >>
> > >>  static int cadence_spi_write_speed(struct udevice *bus, uint hz)
> > >>  {
> > >> @@ -189,6 +190,7 @@ static int cadence_spi_remove(struct udevice *dev)
> > >>
> > >>  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
> > >>  {
> > >> +       struct cadence_spi_platdata *plat = bus->platdata;
> > >>         struct cadence_spi_priv *priv = dev_get_priv(bus);
> > >>
> > >>         /* Disable QSPI */
> > >> @@ -197,6 +199,10 @@ static int cadence_spi_set_mode(struct udevice *bus, uint mode)
> > >>         /* Set SPI mode */
> > >>         cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
> > >>
> > >> +       /* Enable Direct Access Controller */
> > >> +       if (plat->use_dac_mode)
> > >> +               cadence_qspi_apb_dac_mode_enable(priv->regbase);
> > >> +
> > >>         /* Enable QSPI */
> > >>         cadence_qspi_apb_controller_enable(priv->regbase);
> > >>
> > >> @@ -221,12 +227,12 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
> > >>                 if (!op->addr.nbytes)
> > >>                         mode = CQSPI_STIG_READ;
> > >>                 else
> > >> -                       mode = CQSPI_INDIRECT_READ;
> > >> +                       mode = CQSPI_READ;
> > >>         } else {
> > >>                 if (!op->addr.nbytes || !op->data.buf.out)
> > >>                         mode = CQSPI_STIG_WRITE;
> > >>                 else
> > >> -                       mode = CQSPI_INDIRECT_WRITE;
> > >> +                       mode = CQSPI_WRITE;
> > >>         }
> > >>
> > >>         switch (mode) {
> > >> @@ -236,19 +242,15 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
> > >>         case CQSPI_STIG_WRITE:
> > >>                 err = cadence_qspi_apb_command_write(base, op);
> > >>                 break;
> > >> -       case CQSPI_INDIRECT_READ:
> > >> -               err = cadence_qspi_apb_indirect_read_setup(plat, op);
> > >> -               if (!err) {
> > >> -                       err = cadence_qspi_apb_indirect_read_execute
> > >> -                               (plat, op->data.nbytes, op->data.buf.in);
> > >> -               }
> > >> +       case CQSPI_READ:
> > >> +               err = cadence_qspi_apb_read_setup(plat, op);
> > >> +               if (!err)
> > >> +                       err = cadence_qspi_apb_read_execute(plat, op);
> > >>                 break;
> > >> -       case CQSPI_INDIRECT_WRITE:
> > >> -               err = cadence_qspi_apb_indirect_write_setup(plat, op);
> > >> -               if (!err) {
> > >> -                       err = cadence_qspi_apb_indirect_write_execute
> > >> -                       (plat, op->data.nbytes, op->data.buf.out);
> > >> -               }
> > >> +       case CQSPI_WRITE:
> > >> +               err = cadence_qspi_apb_write_setup(plat, op);
> > >> +               if (!err)
> > >> +                       err = cadence_qspi_apb_write_execute(plat, op);
> > >>                 break;
> > >>         default:
> > >>                 err = -1;
> > >> @@ -264,13 +266,17 @@ static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
> > >>         ofnode subnode;
> > >>
> > >>         plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
> > >> -       plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
> > >> +       plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
> > >> +                       &plat->ahbsize);
> > >>         plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
> > >>         plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
> > >>         plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
> > >>         plat->trigger_address = dev_read_u32_default(bus,
> > >>                                                      "cdns,trigger-address",
> > >>                                                      0);
> > >> +       /* Use DAC mode only when MMIO window is at least 8M wide */
> > >> +       if (plat->ahbsize >= SZ_8M)
> > >> +               plat->use_dac_mode = true;
> > >>
> > >>         /* All other paramters are embedded in the child node */
> > >>         subnode = dev_read_first_subnode(bus);
> > >> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
> > >> index e655b027d788..619f0bed8efd 100644
> > >> --- a/drivers/spi/cadence_qspi.h
> > >> +++ b/drivers/spi/cadence_qspi.h
> > >> @@ -23,6 +23,8 @@ struct cadence_spi_platdata {
> > >>         u32             fifo_depth;
> > >>         u32             fifo_width;
> > >>         u32             trigger_address;
> > >> +       fdt_addr_t      ahbsize;
> > >> +       bool            use_dac_mode;
> > >>
> > >>         /* Flash parameters */
> > >>         u32             page_size;
> > >> @@ -52,20 +54,21 @@ struct cadence_spi_priv {
> > >>  void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
> > >>  void cadence_qspi_apb_controller_enable(void *reg_base_addr);
> > >>  void cadence_qspi_apb_controller_disable(void *reg_base_addr);
> > >> +void cadence_qspi_apb_dac_mode_enable(void *reg_base);
> > >>
> > >>  int cadence_qspi_apb_command_read(void *reg_base_addr,
> > >>                                   const struct spi_mem_op *op);
> > >>  int cadence_qspi_apb_command_write(void *reg_base_addr,
> > >>                                    const struct spi_mem_op *op);
> > >>
> > >> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> > >> -       const struct spi_mem_op *op);
> > >> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> > >> -       unsigned int rxlen, u8 *rxbuf);
> > >> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> > >> -       const struct spi_mem_op *op);
> > >> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> > >> -       unsigned int txlen, const u8 *txbuf);
> > >> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> > >> +                               const struct spi_mem_op *op);
> > >> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> > >> +                                 const struct spi_mem_op *op);
> > >> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> > >> +                                const struct spi_mem_op *op);
> > >> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> > >> +                                  const struct spi_mem_op *op);
> > >>
> > >>  void cadence_qspi_apb_chipselect(void *reg_base,
> > >>         unsigned int chip_select, unsigned int decoder_enable);
> > >> diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
> > >> index 8dd0495dfcf4..fd491f2c8104 100644
> > >> --- a/drivers/spi/cadence_qspi_apb.c
> > >> +++ b/drivers/spi/cadence_qspi_apb.c
> > >> @@ -189,6 +189,15 @@ void cadence_qspi_apb_controller_disable(void *reg_base)
> > >>         writel(reg, reg_base + CQSPI_REG_CONFIG);
> > >>  }
> > >>
> > >> +void cadence_qspi_apb_dac_mode_enable(void *reg_base)
> > >> +{
> > >> +       unsigned int reg;
> > >> +
> > >> +       reg = readl(reg_base + CQSPI_REG_CONFIG);
> > >> +       reg |= CQSPI_REG_CONFIG_DIRECT;
> > >> +       writel(reg, reg_base + CQSPI_REG_CONFIG);
> > >> +}
> > >> +
> > >>  /* Return 1 if idle, otherwise return 0 (busy). */
> > >>  static unsigned int cadence_qspi_wait_idle(void *reg_base)
> > >>  {
> > >> @@ -512,8 +521,8 @@ int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
> > >>  }
> > >>
> > >>  /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
> > >> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> > >> -       const struct spi_mem_op *op)
> > >> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> > >> +                               const struct spi_mem_op *op)
> > >>  {
> > >>         unsigned int reg;
> > >>         unsigned int rd_reg;
> > >> @@ -577,8 +586,9 @@ static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
> > >>         return -ETIMEDOUT;
> > >>  }
> > >>
> > >> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> > >> -       unsigned int n_rx, u8 *rxbuf)
> > >> +static int
> > >> +cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> > >> +                                      unsigned int n_rx, u8 *rxbuf)
> > >>  {
> > >>         unsigned int remaining = n_rx;
> > >>         unsigned int bytes_to_read = 0;
> > >> @@ -639,9 +649,26 @@ failrd:
> > >>         return ret;
> > >>  }
> > >>
> > >> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> > >> +                                 const struct spi_mem_op *op)
> > >> +{
> > >> +       u32 from = op->addr.val;
> > >> +       void *buf = op->data.buf.in;
> > >> +       size_t len = op->data.nbytes;
> > >> +
> > >> +       if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
> > >> +               memcpy_fromio(buf, plat->ahbbase + from, len);
> > >> +               if (!cadence_qspi_wait_idle(plat->regbase))
> > >> +                       return -EIO;
> > >> +               return 0;
> > >> +       }
> > >> +
> > >> +       return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
> > >> +}
> > >> +
> > >>  /* Opcode + Address (3/4 bytes) */
> > >> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> > >> -       const struct spi_mem_op *op)
> > >> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> > >> +                                const struct spi_mem_op *op)
> > >>  {
> > >>         unsigned int reg;
> > >>
> > >> @@ -662,8 +689,9 @@ int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> > >>         return 0;
> > >>  }
> > >>
> > >> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> > >> -       unsigned int n_tx, const u8 *txbuf)
> > >> +static int
> > >> +cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> > >> +                                       unsigned int n_tx, const u8 *txbuf)
> > >>  {
> > >>         unsigned int page_size = plat->page_size;
> > >>         unsigned int remaining = n_tx;
> > >> @@ -735,6 +763,23 @@ failwr:
> > >>         return ret;
> > >>  }
> > >>
> > >> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> > >> +                                  const struct spi_mem_op *op)
> > >> +{
> > >> +       u32 to = op->addr.val;
> > >> +       const void *buf = op->data.buf.out;
> > >> +       size_t len = op->data.nbytes;
> > >> +
> > >> +       if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
> > >> +               memcpy_toio(plat->ahbbase + to, buf, len);
> > >> +               if (!cadence_qspi_wait_idle(plat->regbase))
> > >> +                       return -EIO;
> > >> +               return 0;
> > >> +       }
> > >> +
> > >> +       return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
> > >> +}
> > >> +
> > >>  void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
> > >>  {
> > >>         unsigned int reg;
> > >> --
> > >> 2.23.0
> > >>
> >
> > --
> > Regards
> > Vignesh
Vignesh Raghavendra Oct. 18, 2019, 12:40 p.m. UTC | #5
Hi,

On 18/10/19 2:34 PM, Simon Goldschmidt wrote:
> On Thu, Oct 17, 2019 at 2:55 PM Simon Goldschmidt
> <simon.k.r.goldschmidt@gmail.com> wrote:
>>
>> On Thu, Oct 17, 2019 at 2:44 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>>>
>>> Hi,
>>>
>>> On 17/10/19 5:09 PM, Simon Goldschmidt wrote:
>>>> On Mon, Oct 14, 2019 at 3:27 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>>>>>
>>>>> Add support for Direct Access Controller mode of Cadence QSPI. This
>>>>> allows MMIO access to SPI NOR flash providing better read performance.
>>>>>
>>>>> Signed-off-by: Vignesh R <vigneshr@ti.com>
>>>>> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
>>>>
>>>> I've tested this on my socfpga cyclone5 board with an mt25ql256a (running at
>>>> 50MHz) and it seems to work fine.
>>>>
>>>> However, I had the impression it was a bit slower, not faster, although I
>>>> haven't measured, and running at 50MHz with 4 data lines, reading the whole
>>>> flash takes about 1.5 seconds only, so without actually measureing it, it's
>>>> hard to tell if this performance is really worth the 440 bytes of extra code
>>>> it adds?
>>>>
>>>
>>> I should have noted in the commit msg that direct mode is used only when
>>> AHB window is at least 8MB. Working with smaller window sizes would mean
>>> code would fall back to indirect mode most of the time.
>>> I see that socfgpa don't seem to have large AHB windows and therefore
>>> would not use direct access mode.
>>
>> Aha. Yes, socfpga gen5 has a 1MB window only, I think. So the speed hasn't
>> changed.
>>
>>>
>>> On TI platforms its possible to use DMA to read data from memory mapped
>>> region using mem to mem DMA channels which improves throughput by 5 times.
>>
>> Hmm, I'm using a QSPI at 50 MHz and transferring 31 MByte takes about 1.5
>> seconds. The maximum speed of that setup would be 25 MByte/s without any
>> overhead (1.24 seconds for 31 MB). There's no way I could achieve an improvement
>> by 5 on my platform!
>>
>>>
>>>> Note that I'm already short on RAM in SPL (socfpga gen5), so these 440 bytes
>>>> do hurt. Note that patch 1/2 of this series reduced SPL code size by 320 bytes
>>>> for me :-)
>>>>
>>>
>>> Hmm, I read this as patch 1 brings down memory consumption by 320 bytes
>>> and patch 2 adds 440 bytes (delta being +120 bytes for entire series).
>>> I can see if size can be optimized, but would like to avoid #ifdef'ery
>>> within the driver if possible.
>>
>> Well, I can only say I'm currently struggling to keep SPI-NOR and MMC enabled
>> at the same time in socfpga_socrates (moving more code to DM). And even if it
>> sounds like not much, 440 bytes *are* much for me at this point.
>>
>> I still would prefer having a Kconfig option for this that can be disabled
>> for socfpga.
> 
> Oh well, maybe just go ahead adding this and I'll try how it fits. We can alway
> add reduced functionality later by checking for CONFIG_SPL_SPI_FLASH_TINY,
> whicht might be a better config option to guard this than something cadence-qspi
> specific.
> 

Okay, thanks! Guarding with CONFIG_SPL_SPI_FLASH_TINY as and where
required seems fair idea. BTW, it would be great if you could confirm
Quad read works fine when spi-tx-bus-width = 1 and spi-rx-bus-width = 4
as I suggested in other thread?

Regards
Vignesh

> Regards,
> Simon
> 
>>
>> Regards,
>> Simon
>>
>>>
>>> Regards
>>> Vignesh
>>>
>>>> Regards,
>>>> Simon
>>>>
>>>>
>>>>> ---
>>>>>  drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
>>>>>  drivers/spi/cadence_qspi.h     | 19 ++++++-----
>>>>>  drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
>>>>>  3 files changed, 87 insertions(+), 33 deletions(-)
>>>>>
>>>>> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
>>>>> index 673a2e9a6c4c..6c98cbf39ae4 100644
>>>>> --- a/drivers/spi/cadence_qspi.c
>>>>> +++ b/drivers/spi/cadence_qspi.c
>>>>> @@ -12,12 +12,13 @@
>>>>>  #include <spi.h>
>>>>>  #include <spi-mem.h>
>>>>>  #include <linux/errno.h>
>>>>> +#include <linux/sizes.h>
>>>>>  #include "cadence_qspi.h"
>>>>>
>>>>>  #define CQSPI_STIG_READ                        0
>>>>>  #define CQSPI_STIG_WRITE               1
>>>>> -#define CQSPI_INDIRECT_READ            2
>>>>> -#define CQSPI_INDIRECT_WRITE           3
>>>>> +#define CQSPI_READ                     2
>>>>> +#define CQSPI_WRITE                    3
>>>>>
>>>>>  static int cadence_spi_write_speed(struct udevice *bus, uint hz)
>>>>>  {
>>>>> @@ -189,6 +190,7 @@ static int cadence_spi_remove(struct udevice *dev)
>>>>>
>>>>>  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
>>>>>  {
>>>>> +       struct cadence_spi_platdata *plat = bus->platdata;
>>>>>         struct cadence_spi_priv *priv = dev_get_priv(bus);
>>>>>
>>>>>         /* Disable QSPI */
>>>>> @@ -197,6 +199,10 @@ static int cadence_spi_set_mode(struct udevice *bus, uint mode)
>>>>>         /* Set SPI mode */
>>>>>         cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
>>>>>
>>>>> +       /* Enable Direct Access Controller */
>>>>> +       if (plat->use_dac_mode)
>>>>> +               cadence_qspi_apb_dac_mode_enable(priv->regbase);
>>>>> +
>>>>>         /* Enable QSPI */
>>>>>         cadence_qspi_apb_controller_enable(priv->regbase);
>>>>>
>>>>> @@ -221,12 +227,12 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
>>>>>                 if (!op->addr.nbytes)
>>>>>                         mode = CQSPI_STIG_READ;
>>>>>                 else
>>>>> -                       mode = CQSPI_INDIRECT_READ;
>>>>> +                       mode = CQSPI_READ;
>>>>>         } else {
>>>>>                 if (!op->addr.nbytes || !op->data.buf.out)
>>>>>                         mode = CQSPI_STIG_WRITE;
>>>>>                 else
>>>>> -                       mode = CQSPI_INDIRECT_WRITE;
>>>>> +                       mode = CQSPI_WRITE;
>>>>>         }
>>>>>
>>>>>         switch (mode) {
>>>>> @@ -236,19 +242,15 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
>>>>>         case CQSPI_STIG_WRITE:
>>>>>                 err = cadence_qspi_apb_command_write(base, op);
>>>>>                 break;
>>>>> -       case CQSPI_INDIRECT_READ:
>>>>> -               err = cadence_qspi_apb_indirect_read_setup(plat, op);
>>>>> -               if (!err) {
>>>>> -                       err = cadence_qspi_apb_indirect_read_execute
>>>>> -                               (plat, op->data.nbytes, op->data.buf.in);
>>>>> -               }
>>>>> +       case CQSPI_READ:
>>>>> +               err = cadence_qspi_apb_read_setup(plat, op);
>>>>> +               if (!err)
>>>>> +                       err = cadence_qspi_apb_read_execute(plat, op);
>>>>>                 break;
>>>>> -       case CQSPI_INDIRECT_WRITE:
>>>>> -               err = cadence_qspi_apb_indirect_write_setup(plat, op);
>>>>> -               if (!err) {
>>>>> -                       err = cadence_qspi_apb_indirect_write_execute
>>>>> -                       (plat, op->data.nbytes, op->data.buf.out);
>>>>> -               }
>>>>> +       case CQSPI_WRITE:
>>>>> +               err = cadence_qspi_apb_write_setup(plat, op);
>>>>> +               if (!err)
>>>>> +                       err = cadence_qspi_apb_write_execute(plat, op);
>>>>>                 break;
>>>>>         default:
>>>>>                 err = -1;
>>>>> @@ -264,13 +266,17 @@ static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
>>>>>         ofnode subnode;
>>>>>
>>>>>         plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
>>>>> -       plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
>>>>> +       plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
>>>>> +                       &plat->ahbsize);
>>>>>         plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
>>>>>         plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
>>>>>         plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
>>>>>         plat->trigger_address = dev_read_u32_default(bus,
>>>>>                                                      "cdns,trigger-address",
>>>>>                                                      0);
>>>>> +       /* Use DAC mode only when MMIO window is at least 8M wide */
>>>>> +       if (plat->ahbsize >= SZ_8M)
>>>>> +               plat->use_dac_mode = true;
>>>>>
>>>>>         /* All other paramters are embedded in the child node */
>>>>>         subnode = dev_read_first_subnode(bus);
>>>>> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
>>>>> index e655b027d788..619f0bed8efd 100644
>>>>> --- a/drivers/spi/cadence_qspi.h
>>>>> +++ b/drivers/spi/cadence_qspi.h
>>>>> @@ -23,6 +23,8 @@ struct cadence_spi_platdata {
>>>>>         u32             fifo_depth;
>>>>>         u32             fifo_width;
>>>>>         u32             trigger_address;
>>>>> +       fdt_addr_t      ahbsize;
>>>>> +       bool            use_dac_mode;
>>>>>
>>>>>         /* Flash parameters */
>>>>>         u32             page_size;
>>>>> @@ -52,20 +54,21 @@ struct cadence_spi_priv {
>>>>>  void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
>>>>>  void cadence_qspi_apb_controller_enable(void *reg_base_addr);
>>>>>  void cadence_qspi_apb_controller_disable(void *reg_base_addr);
>>>>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base);
>>>>>
>>>>>  int cadence_qspi_apb_command_read(void *reg_base_addr,
>>>>>                                   const struct spi_mem_op *op);
>>>>>  int cadence_qspi_apb_command_write(void *reg_base_addr,
>>>>>                                    const struct spi_mem_op *op);
>>>>>
>>>>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
>>>>> -       const struct spi_mem_op *op);
>>>>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>>>>> -       unsigned int rxlen, u8 *rxbuf);
>>>>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>>>>> -       const struct spi_mem_op *op);
>>>>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>>>>> -       unsigned int txlen, const u8 *txbuf);
>>>>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
>>>>> +                               const struct spi_mem_op *op);
>>>>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
>>>>> +                                 const struct spi_mem_op *op);
>>>>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
>>>>> +                                const struct spi_mem_op *op);
>>>>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
>>>>> +                                  const struct spi_mem_op *op);
>>>>>
>>>>>  void cadence_qspi_apb_chipselect(void *reg_base,
>>>>>         unsigned int chip_select, unsigned int decoder_enable);
>>>>> diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
>>>>> index 8dd0495dfcf4..fd491f2c8104 100644
>>>>> --- a/drivers/spi/cadence_qspi_apb.c
>>>>> +++ b/drivers/spi/cadence_qspi_apb.c
>>>>> @@ -189,6 +189,15 @@ void cadence_qspi_apb_controller_disable(void *reg_base)
>>>>>         writel(reg, reg_base + CQSPI_REG_CONFIG);
>>>>>  }
>>>>>
>>>>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base)
>>>>> +{
>>>>> +       unsigned int reg;
>>>>> +
>>>>> +       reg = readl(reg_base + CQSPI_REG_CONFIG);
>>>>> +       reg |= CQSPI_REG_CONFIG_DIRECT;
>>>>> +       writel(reg, reg_base + CQSPI_REG_CONFIG);
>>>>> +}
>>>>> +
>>>>>  /* Return 1 if idle, otherwise return 0 (busy). */
>>>>>  static unsigned int cadence_qspi_wait_idle(void *reg_base)
>>>>>  {
>>>>> @@ -512,8 +521,8 @@ int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
>>>>>  }
>>>>>
>>>>>  /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
>>>>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
>>>>> -       const struct spi_mem_op *op)
>>>>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
>>>>> +                               const struct spi_mem_op *op)
>>>>>  {
>>>>>         unsigned int reg;
>>>>>         unsigned int rd_reg;
>>>>> @@ -577,8 +586,9 @@ static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
>>>>>         return -ETIMEDOUT;
>>>>>  }
>>>>>
>>>>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>>>>> -       unsigned int n_rx, u8 *rxbuf)
>>>>> +static int
>>>>> +cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>>>>> +                                      unsigned int n_rx, u8 *rxbuf)
>>>>>  {
>>>>>         unsigned int remaining = n_rx;
>>>>>         unsigned int bytes_to_read = 0;
>>>>> @@ -639,9 +649,26 @@ failrd:
>>>>>         return ret;
>>>>>  }
>>>>>
>>>>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
>>>>> +                                 const struct spi_mem_op *op)
>>>>> +{
>>>>> +       u32 from = op->addr.val;
>>>>> +       void *buf = op->data.buf.in;
>>>>> +       size_t len = op->data.nbytes;
>>>>> +
>>>>> +       if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
>>>>> +               memcpy_fromio(buf, plat->ahbbase + from, len);
>>>>> +               if (!cadence_qspi_wait_idle(plat->regbase))
>>>>> +                       return -EIO;
>>>>> +               return 0;
>>>>> +       }
>>>>> +
>>>>> +       return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
>>>>> +}
>>>>> +
>>>>>  /* Opcode + Address (3/4 bytes) */
>>>>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>>>>> -       const struct spi_mem_op *op)
>>>>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
>>>>> +                                const struct spi_mem_op *op)
>>>>>  {
>>>>>         unsigned int reg;
>>>>>
>>>>> @@ -662,8 +689,9 @@ int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>>>>>         return 0;
>>>>>  }
>>>>>
>>>>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>>>>> -       unsigned int n_tx, const u8 *txbuf)
>>>>> +static int
>>>>> +cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>>>>> +                                       unsigned int n_tx, const u8 *txbuf)
>>>>>  {
>>>>>         unsigned int page_size = plat->page_size;
>>>>>         unsigned int remaining = n_tx;
>>>>> @@ -735,6 +763,23 @@ failwr:
>>>>>         return ret;
>>>>>  }
>>>>>
>>>>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
>>>>> +                                  const struct spi_mem_op *op)
>>>>> +{
>>>>> +       u32 to = op->addr.val;
>>>>> +       const void *buf = op->data.buf.out;
>>>>> +       size_t len = op->data.nbytes;
>>>>> +
>>>>> +       if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
>>>>> +               memcpy_toio(plat->ahbbase + to, buf, len);
>>>>> +               if (!cadence_qspi_wait_idle(plat->regbase))
>>>>> +                       return -EIO;
>>>>> +               return 0;
>>>>> +       }
>>>>> +
>>>>> +       return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
>>>>> +}
>>>>> +
>>>>>  void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
>>>>>  {
>>>>>         unsigned int reg;
>>>>> --
>>>>> 2.23.0
>>>>>
>>>
>>> --
>>> Regards
>>> Vignesh
Simon Goldschmidt Oct. 18, 2019, 12:42 p.m. UTC | #6
On Fri, Oct 18, 2019 at 2:40 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>
> Hi,
>
> On 18/10/19 2:34 PM, Simon Goldschmidt wrote:
> > On Thu, Oct 17, 2019 at 2:55 PM Simon Goldschmidt
> > <simon.k.r.goldschmidt@gmail.com> wrote:
> >>
> >> On Thu, Oct 17, 2019 at 2:44 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
> >>>
> >>> Hi,
> >>>
> >>> On 17/10/19 5:09 PM, Simon Goldschmidt wrote:
> >>>> On Mon, Oct 14, 2019 at 3:27 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
> >>>>>
> >>>>> Add support for Direct Access Controller mode of Cadence QSPI. This
> >>>>> allows MMIO access to SPI NOR flash providing better read performance.
> >>>>>
> >>>>> Signed-off-by: Vignesh R <vigneshr@ti.com>
> >>>>> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> >>>>
> >>>> I've tested this on my socfpga cyclone5 board with an mt25ql256a (running at
> >>>> 50MHz) and it seems to work fine.
> >>>>
> >>>> However, I had the impression it was a bit slower, not faster, although I
> >>>> haven't measured, and running at 50MHz with 4 data lines, reading the whole
> >>>> flash takes about 1.5 seconds only, so without actually measureing it, it's
> >>>> hard to tell if this performance is really worth the 440 bytes of extra code
> >>>> it adds?
> >>>>
> >>>
> >>> I should have noted in the commit msg that direct mode is used only when
> >>> AHB window is at least 8MB. Working with smaller window sizes would mean
> >>> code would fall back to indirect mode most of the time.
> >>> I see that socfgpa don't seem to have large AHB windows and therefore
> >>> would not use direct access mode.
> >>
> >> Aha. Yes, socfpga gen5 has a 1MB window only, I think. So the speed hasn't
> >> changed.
> >>
> >>>
> >>> On TI platforms its possible to use DMA to read data from memory mapped
> >>> region using mem to mem DMA channels which improves throughput by 5 times.
> >>
> >> Hmm, I'm using a QSPI at 50 MHz and transferring 31 MByte takes about 1.5
> >> seconds. The maximum speed of that setup would be 25 MByte/s without any
> >> overhead (1.24 seconds for 31 MB). There's no way I could achieve an improvement
> >> by 5 on my platform!
> >>
> >>>
> >>>> Note that I'm already short on RAM in SPL (socfpga gen5), so these 440 bytes
> >>>> do hurt. Note that patch 1/2 of this series reduced SPL code size by 320 bytes
> >>>> for me :-)
> >>>>
> >>>
> >>> Hmm, I read this as patch 1 brings down memory consumption by 320 bytes
> >>> and patch 2 adds 440 bytes (delta being +120 bytes for entire series).
> >>> I can see if size can be optimized, but would like to avoid #ifdef'ery
> >>> within the driver if possible.
> >>
> >> Well, I can only say I'm currently struggling to keep SPI-NOR and MMC enabled
> >> at the same time in socfpga_socrates (moving more code to DM). And even if it
> >> sounds like not much, 440 bytes *are* much for me at this point.
> >>
> >> I still would prefer having a Kconfig option for this that can be disabled
> >> for socfpga.
> >
> > Oh well, maybe just go ahead adding this and I'll try how it fits. We can alway
> > add reduced functionality later by checking for CONFIG_SPL_SPI_FLASH_TINY,
> > whicht might be a better config option to guard this than something cadence-qspi
> > specific.
> >
>
> Okay, thanks! Guarding with CONFIG_SPL_SPI_FLASH_TINY as and where
> required seems fair idea. BTW, it would be great if you could confirm
> Quad read works fine when spi-tx-bus-width = 1 and spi-rx-bus-width = 4
> as I suggested in other thread?

Yes, I'll do that when I find the time. Unfortunately, I can only do
that on the devices
at work, not at home. I also would like to check the memory-mapped code works on
socfpga by removing the 8MB size check, does that make sense?

Regards,
Simon

>
> Regards
> Vignesh
>
> > Regards,
> > Simon
> >
> >>
> >> Regards,
> >> Simon
> >>
> >>>
> >>> Regards
> >>> Vignesh
> >>>
> >>>> Regards,
> >>>> Simon
> >>>>
> >>>>
> >>>>> ---
> >>>>>  drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
> >>>>>  drivers/spi/cadence_qspi.h     | 19 ++++++-----
> >>>>>  drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
> >>>>>  3 files changed, 87 insertions(+), 33 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
> >>>>> index 673a2e9a6c4c..6c98cbf39ae4 100644
> >>>>> --- a/drivers/spi/cadence_qspi.c
> >>>>> +++ b/drivers/spi/cadence_qspi.c
> >>>>> @@ -12,12 +12,13 @@
> >>>>>  #include <spi.h>
> >>>>>  #include <spi-mem.h>
> >>>>>  #include <linux/errno.h>
> >>>>> +#include <linux/sizes.h>
> >>>>>  #include "cadence_qspi.h"
> >>>>>
> >>>>>  #define CQSPI_STIG_READ                        0
> >>>>>  #define CQSPI_STIG_WRITE               1
> >>>>> -#define CQSPI_INDIRECT_READ            2
> >>>>> -#define CQSPI_INDIRECT_WRITE           3
> >>>>> +#define CQSPI_READ                     2
> >>>>> +#define CQSPI_WRITE                    3
> >>>>>
> >>>>>  static int cadence_spi_write_speed(struct udevice *bus, uint hz)
> >>>>>  {
> >>>>> @@ -189,6 +190,7 @@ static int cadence_spi_remove(struct udevice *dev)
> >>>>>
> >>>>>  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
> >>>>>  {
> >>>>> +       struct cadence_spi_platdata *plat = bus->platdata;
> >>>>>         struct cadence_spi_priv *priv = dev_get_priv(bus);
> >>>>>
> >>>>>         /* Disable QSPI */
> >>>>> @@ -197,6 +199,10 @@ static int cadence_spi_set_mode(struct udevice *bus, uint mode)
> >>>>>         /* Set SPI mode */
> >>>>>         cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
> >>>>>
> >>>>> +       /* Enable Direct Access Controller */
> >>>>> +       if (plat->use_dac_mode)
> >>>>> +               cadence_qspi_apb_dac_mode_enable(priv->regbase);
> >>>>> +
> >>>>>         /* Enable QSPI */
> >>>>>         cadence_qspi_apb_controller_enable(priv->regbase);
> >>>>>
> >>>>> @@ -221,12 +227,12 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
> >>>>>                 if (!op->addr.nbytes)
> >>>>>                         mode = CQSPI_STIG_READ;
> >>>>>                 else
> >>>>> -                       mode = CQSPI_INDIRECT_READ;
> >>>>> +                       mode = CQSPI_READ;
> >>>>>         } else {
> >>>>>                 if (!op->addr.nbytes || !op->data.buf.out)
> >>>>>                         mode = CQSPI_STIG_WRITE;
> >>>>>                 else
> >>>>> -                       mode = CQSPI_INDIRECT_WRITE;
> >>>>> +                       mode = CQSPI_WRITE;
> >>>>>         }
> >>>>>
> >>>>>         switch (mode) {
> >>>>> @@ -236,19 +242,15 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
> >>>>>         case CQSPI_STIG_WRITE:
> >>>>>                 err = cadence_qspi_apb_command_write(base, op);
> >>>>>                 break;
> >>>>> -       case CQSPI_INDIRECT_READ:
> >>>>> -               err = cadence_qspi_apb_indirect_read_setup(plat, op);
> >>>>> -               if (!err) {
> >>>>> -                       err = cadence_qspi_apb_indirect_read_execute
> >>>>> -                               (plat, op->data.nbytes, op->data.buf.in);
> >>>>> -               }
> >>>>> +       case CQSPI_READ:
> >>>>> +               err = cadence_qspi_apb_read_setup(plat, op);
> >>>>> +               if (!err)
> >>>>> +                       err = cadence_qspi_apb_read_execute(plat, op);
> >>>>>                 break;
> >>>>> -       case CQSPI_INDIRECT_WRITE:
> >>>>> -               err = cadence_qspi_apb_indirect_write_setup(plat, op);
> >>>>> -               if (!err) {
> >>>>> -                       err = cadence_qspi_apb_indirect_write_execute
> >>>>> -                       (plat, op->data.nbytes, op->data.buf.out);
> >>>>> -               }
> >>>>> +       case CQSPI_WRITE:
> >>>>> +               err = cadence_qspi_apb_write_setup(plat, op);
> >>>>> +               if (!err)
> >>>>> +                       err = cadence_qspi_apb_write_execute(plat, op);
> >>>>>                 break;
> >>>>>         default:
> >>>>>                 err = -1;
> >>>>> @@ -264,13 +266,17 @@ static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
> >>>>>         ofnode subnode;
> >>>>>
> >>>>>         plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
> >>>>> -       plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
> >>>>> +       plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
> >>>>> +                       &plat->ahbsize);
> >>>>>         plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
> >>>>>         plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
> >>>>>         plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
> >>>>>         plat->trigger_address = dev_read_u32_default(bus,
> >>>>>                                                      "cdns,trigger-address",
> >>>>>                                                      0);
> >>>>> +       /* Use DAC mode only when MMIO window is at least 8M wide */
> >>>>> +       if (plat->ahbsize >= SZ_8M)
> >>>>> +               plat->use_dac_mode = true;
> >>>>>
> >>>>>         /* All other paramters are embedded in the child node */
> >>>>>         subnode = dev_read_first_subnode(bus);
> >>>>> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
> >>>>> index e655b027d788..619f0bed8efd 100644
> >>>>> --- a/drivers/spi/cadence_qspi.h
> >>>>> +++ b/drivers/spi/cadence_qspi.h
> >>>>> @@ -23,6 +23,8 @@ struct cadence_spi_platdata {
> >>>>>         u32             fifo_depth;
> >>>>>         u32             fifo_width;
> >>>>>         u32             trigger_address;
> >>>>> +       fdt_addr_t      ahbsize;
> >>>>> +       bool            use_dac_mode;
> >>>>>
> >>>>>         /* Flash parameters */
> >>>>>         u32             page_size;
> >>>>> @@ -52,20 +54,21 @@ struct cadence_spi_priv {
> >>>>>  void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
> >>>>>  void cadence_qspi_apb_controller_enable(void *reg_base_addr);
> >>>>>  void cadence_qspi_apb_controller_disable(void *reg_base_addr);
> >>>>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base);
> >>>>>
> >>>>>  int cadence_qspi_apb_command_read(void *reg_base_addr,
> >>>>>                                   const struct spi_mem_op *op);
> >>>>>  int cadence_qspi_apb_command_write(void *reg_base_addr,
> >>>>>                                    const struct spi_mem_op *op);
> >>>>>
> >>>>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> >>>>> -       const struct spi_mem_op *op);
> >>>>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >>>>> -       unsigned int rxlen, u8 *rxbuf);
> >>>>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >>>>> -       const struct spi_mem_op *op);
> >>>>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >>>>> -       unsigned int txlen, const u8 *txbuf);
> >>>>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> >>>>> +                               const struct spi_mem_op *op);
> >>>>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> >>>>> +                                 const struct spi_mem_op *op);
> >>>>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> >>>>> +                                const struct spi_mem_op *op);
> >>>>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> >>>>> +                                  const struct spi_mem_op *op);
> >>>>>
> >>>>>  void cadence_qspi_apb_chipselect(void *reg_base,
> >>>>>         unsigned int chip_select, unsigned int decoder_enable);
> >>>>> diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
> >>>>> index 8dd0495dfcf4..fd491f2c8104 100644
> >>>>> --- a/drivers/spi/cadence_qspi_apb.c
> >>>>> +++ b/drivers/spi/cadence_qspi_apb.c
> >>>>> @@ -189,6 +189,15 @@ void cadence_qspi_apb_controller_disable(void *reg_base)
> >>>>>         writel(reg, reg_base + CQSPI_REG_CONFIG);
> >>>>>  }
> >>>>>
> >>>>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base)
> >>>>> +{
> >>>>> +       unsigned int reg;
> >>>>> +
> >>>>> +       reg = readl(reg_base + CQSPI_REG_CONFIG);
> >>>>> +       reg |= CQSPI_REG_CONFIG_DIRECT;
> >>>>> +       writel(reg, reg_base + CQSPI_REG_CONFIG);
> >>>>> +}
> >>>>> +
> >>>>>  /* Return 1 if idle, otherwise return 0 (busy). */
> >>>>>  static unsigned int cadence_qspi_wait_idle(void *reg_base)
> >>>>>  {
> >>>>> @@ -512,8 +521,8 @@ int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
> >>>>>  }
> >>>>>
> >>>>>  /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
> >>>>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> >>>>> -       const struct spi_mem_op *op)
> >>>>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> >>>>> +                               const struct spi_mem_op *op)
> >>>>>  {
> >>>>>         unsigned int reg;
> >>>>>         unsigned int rd_reg;
> >>>>> @@ -577,8 +586,9 @@ static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
> >>>>>         return -ETIMEDOUT;
> >>>>>  }
> >>>>>
> >>>>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >>>>> -       unsigned int n_rx, u8 *rxbuf)
> >>>>> +static int
> >>>>> +cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >>>>> +                                      unsigned int n_rx, u8 *rxbuf)
> >>>>>  {
> >>>>>         unsigned int remaining = n_rx;
> >>>>>         unsigned int bytes_to_read = 0;
> >>>>> @@ -639,9 +649,26 @@ failrd:
> >>>>>         return ret;
> >>>>>  }
> >>>>>
> >>>>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> >>>>> +                                 const struct spi_mem_op *op)
> >>>>> +{
> >>>>> +       u32 from = op->addr.val;
> >>>>> +       void *buf = op->data.buf.in;
> >>>>> +       size_t len = op->data.nbytes;
> >>>>> +
> >>>>> +       if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
> >>>>> +               memcpy_fromio(buf, plat->ahbbase + from, len);
> >>>>> +               if (!cadence_qspi_wait_idle(plat->regbase))
> >>>>> +                       return -EIO;
> >>>>> +               return 0;
> >>>>> +       }
> >>>>> +
> >>>>> +       return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
> >>>>> +}
> >>>>> +
> >>>>>  /* Opcode + Address (3/4 bytes) */
> >>>>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >>>>> -       const struct spi_mem_op *op)
> >>>>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> >>>>> +                                const struct spi_mem_op *op)
> >>>>>  {
> >>>>>         unsigned int reg;
> >>>>>
> >>>>> @@ -662,8 +689,9 @@ int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >>>>>         return 0;
> >>>>>  }
> >>>>>
> >>>>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >>>>> -       unsigned int n_tx, const u8 *txbuf)
> >>>>> +static int
> >>>>> +cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >>>>> +                                       unsigned int n_tx, const u8 *txbuf)
> >>>>>  {
> >>>>>         unsigned int page_size = plat->page_size;
> >>>>>         unsigned int remaining = n_tx;
> >>>>> @@ -735,6 +763,23 @@ failwr:
> >>>>>         return ret;
> >>>>>  }
> >>>>>
> >>>>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> >>>>> +                                  const struct spi_mem_op *op)
> >>>>> +{
> >>>>> +       u32 to = op->addr.val;
> >>>>> +       const void *buf = op->data.buf.out;
> >>>>> +       size_t len = op->data.nbytes;
> >>>>> +
> >>>>> +       if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
> >>>>> +               memcpy_toio(plat->ahbbase + to, buf, len);
> >>>>> +               if (!cadence_qspi_wait_idle(plat->regbase))
> >>>>> +                       return -EIO;
> >>>>> +               return 0;
> >>>>> +       }
> >>>>> +
> >>>>> +       return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
> >>>>> +}
> >>>>> +
> >>>>>  void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
> >>>>>  {
> >>>>>         unsigned int reg;
> >>>>> --
> >>>>> 2.23.0
> >>>>>
> >>>
> >>> --
> >>> Regards
> >>> Vignesh
>
> --
> Regards
> Vignesh
Vignesh Raghavendra Oct. 23, 2019, 10:01 a.m. UTC | #7
On 18/10/19 6:12 PM, Simon Goldschmidt wrote:
> On Fri, Oct 18, 2019 at 2:40 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>>
>> Hi,
>>
>> On 18/10/19 2:34 PM, Simon Goldschmidt wrote:
>>> On Thu, Oct 17, 2019 at 2:55 PM Simon Goldschmidt
>>> <simon.k.r.goldschmidt@gmail.com> wrote:
>>>>
>>>> On Thu, Oct 17, 2019 at 2:44 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>>>>>
>>>>> Hi,
>>>>>
>>>>> On 17/10/19 5:09 PM, Simon Goldschmidt wrote:
>>>>>> On Mon, Oct 14, 2019 at 3:27 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>>>>>>>
>>>>>>> Add support for Direct Access Controller mode of Cadence QSPI. This
>>>>>>> allows MMIO access to SPI NOR flash providing better read performance.
>>>>>>>
>>>>>>> Signed-off-by: Vignesh R <vigneshr@ti.com>
>>>>>>> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
>>>>>>
>>>>>> I've tested this on my socfpga cyclone5 board with an mt25ql256a (running at
>>>>>> 50MHz) and it seems to work fine.
>>>>>>
>>>>>> However, I had the impression it was a bit slower, not faster, although I
>>>>>> haven't measured, and running at 50MHz with 4 data lines, reading the whole
>>>>>> flash takes about 1.5 seconds only, so without actually measureing it, it's
>>>>>> hard to tell if this performance is really worth the 440 bytes of extra code
>>>>>> it adds?
>>>>>>
>>>>>
>>>>> I should have noted in the commit msg that direct mode is used only when
>>>>> AHB window is at least 8MB. Working with smaller window sizes would mean
>>>>> code would fall back to indirect mode most of the time.
>>>>> I see that socfgpa don't seem to have large AHB windows and therefore
>>>>> would not use direct access mode.
>>>>
>>>> Aha. Yes, socfpga gen5 has a 1MB window only, I think. So the speed hasn't
>>>> changed.
>>>>
>>>>>
>>>>> On TI platforms its possible to use DMA to read data from memory mapped
>>>>> region using mem to mem DMA channels which improves throughput by 5 times.
>>>>
>>>> Hmm, I'm using a QSPI at 50 MHz and transferring 31 MByte takes about 1.5
>>>> seconds. The maximum speed of that setup would be 25 MByte/s without any
>>>> overhead (1.24 seconds for 31 MB). There's no way I could achieve an improvement
>>>> by 5 on my platform!
>>>>
>>>>>
>>>>>> Note that I'm already short on RAM in SPL (socfpga gen5), so these 440 bytes
>>>>>> do hurt. Note that patch 1/2 of this series reduced SPL code size by 320 bytes
>>>>>> for me :-)
>>>>>>
>>>>>
>>>>> Hmm, I read this as patch 1 brings down memory consumption by 320 bytes
>>>>> and patch 2 adds 440 bytes (delta being +120 bytes for entire series).
>>>>> I can see if size can be optimized, but would like to avoid #ifdef'ery
>>>>> within the driver if possible.
>>>>
>>>> Well, I can only say I'm currently struggling to keep SPI-NOR and MMC enabled
>>>> at the same time in socfpga_socrates (moving more code to DM). And even if it
>>>> sounds like not much, 440 bytes *are* much for me at this point.
>>>>
>>>> I still would prefer having a Kconfig option for this that can be disabled
>>>> for socfpga.
>>>
>>> Oh well, maybe just go ahead adding this and I'll try how it fits. We can alway
>>> add reduced functionality later by checking for CONFIG_SPL_SPI_FLASH_TINY,
>>> whicht might be a better config option to guard this than something cadence-qspi
>>> specific.
>>>
>>
>> Okay, thanks! Guarding with CONFIG_SPL_SPI_FLASH_TINY as and where
>> required seems fair idea. BTW, it would be great if you could confirm
>> Quad read works fine when spi-tx-bus-width = 1 and spi-rx-bus-width = 4
>> as I suggested in other thread?
> 
> Yes, I'll do that when I find the time. Unfortunately, I can only do
> that on the devices
> at work, not at home. 

Alright, thanks!

> I also would like to check the memory-mapped code works on
> socfpga by removing the 8MB size check, does that make sense?
> 

Well, in that case memory-mapped mode will be used for accessing data
within first 1MB of flash. Rest of the data would be accessed via
indirect mode.
I think if AHB window is small, then probably SoC designers did not
intend, controller to be used in DAC or memory-mapped mode.

Regards
Vignesh

> Regards,
> Simon
> 
>>
>> Regards
>> Vignesh
>>
>>> Regards,
>>> Simon
>>>
>>>>
>>>> Regards,
>>>> Simon
>>>>
>>>>>
>>>>> Regards
>>>>> Vignesh
>>>>>
>>>>>> Regards,
>>>>>> Simon
>>>>>>
>>>>>>
>>>>>>> ---
>>>>>>>  drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
>>>>>>>  drivers/spi/cadence_qspi.h     | 19 ++++++-----
>>>>>>>  drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
>>>>>>>  3 files changed, 87 insertions(+), 33 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
>>>>>>> index 673a2e9a6c4c..6c98cbf39ae4 100644
>>>>>>> --- a/drivers/spi/cadence_qspi.c
>>>>>>> +++ b/drivers/spi/cadence_qspi.c
>>>>>>> @@ -12,12 +12,13 @@
>>>>>>>  #include <spi.h>
>>>>>>>  #include <spi-mem.h>
>>>>>>>  #include <linux/errno.h>
>>>>>>> +#include <linux/sizes.h>
>>>>>>>  #include "cadence_qspi.h"
>>>>>>>
>>>>>>>  #define CQSPI_STIG_READ                        0
>>>>>>>  #define CQSPI_STIG_WRITE               1
>>>>>>> -#define CQSPI_INDIRECT_READ            2
>>>>>>> -#define CQSPI_INDIRECT_WRITE           3
>>>>>>> +#define CQSPI_READ                     2
>>>>>>> +#define CQSPI_WRITE                    3
>>>>>>>
>>>>>>>  static int cadence_spi_write_speed(struct udevice *bus, uint hz)
>>>>>>>  {
>>>>>>> @@ -189,6 +190,7 @@ static int cadence_spi_remove(struct udevice *dev)
>>>>>>>
>>>>>>>  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
>>>>>>>  {
>>>>>>> +       struct cadence_spi_platdata *plat = bus->platdata;
>>>>>>>         struct cadence_spi_priv *priv = dev_get_priv(bus);
>>>>>>>
>>>>>>>         /* Disable QSPI */
>>>>>>> @@ -197,6 +199,10 @@ static int cadence_spi_set_mode(struct udevice *bus, uint mode)
>>>>>>>         /* Set SPI mode */
>>>>>>>         cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
>>>>>>>
>>>>>>> +       /* Enable Direct Access Controller */
>>>>>>> +       if (plat->use_dac_mode)
>>>>>>> +               cadence_qspi_apb_dac_mode_enable(priv->regbase);
>>>>>>> +
>>>>>>>         /* Enable QSPI */
>>>>>>>         cadence_qspi_apb_controller_enable(priv->regbase);
>>>>>>>
>>>>>>> @@ -221,12 +227,12 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
>>>>>>>                 if (!op->addr.nbytes)
>>>>>>>                         mode = CQSPI_STIG_READ;
>>>>>>>                 else
>>>>>>> -                       mode = CQSPI_INDIRECT_READ;
>>>>>>> +                       mode = CQSPI_READ;
>>>>>>>         } else {
>>>>>>>                 if (!op->addr.nbytes || !op->data.buf.out)
>>>>>>>                         mode = CQSPI_STIG_WRITE;
>>>>>>>                 else
>>>>>>> -                       mode = CQSPI_INDIRECT_WRITE;
>>>>>>> +                       mode = CQSPI_WRITE;
>>>>>>>         }
>>>>>>>
>>>>>>>         switch (mode) {
>>>>>>> @@ -236,19 +242,15 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
>>>>>>>         case CQSPI_STIG_WRITE:
>>>>>>>                 err = cadence_qspi_apb_command_write(base, op);
>>>>>>>                 break;
>>>>>>> -       case CQSPI_INDIRECT_READ:
>>>>>>> -               err = cadence_qspi_apb_indirect_read_setup(plat, op);
>>>>>>> -               if (!err) {
>>>>>>> -                       err = cadence_qspi_apb_indirect_read_execute
>>>>>>> -                               (plat, op->data.nbytes, op->data.buf.in);
>>>>>>> -               }
>>>>>>> +       case CQSPI_READ:
>>>>>>> +               err = cadence_qspi_apb_read_setup(plat, op);
>>>>>>> +               if (!err)
>>>>>>> +                       err = cadence_qspi_apb_read_execute(plat, op);
>>>>>>>                 break;
>>>>>>> -       case CQSPI_INDIRECT_WRITE:
>>>>>>> -               err = cadence_qspi_apb_indirect_write_setup(plat, op);
>>>>>>> -               if (!err) {
>>>>>>> -                       err = cadence_qspi_apb_indirect_write_execute
>>>>>>> -                       (plat, op->data.nbytes, op->data.buf.out);
>>>>>>> -               }
>>>>>>> +       case CQSPI_WRITE:
>>>>>>> +               err = cadence_qspi_apb_write_setup(plat, op);
>>>>>>> +               if (!err)
>>>>>>> +                       err = cadence_qspi_apb_write_execute(plat, op);
>>>>>>>                 break;
>>>>>>>         default:
>>>>>>>                 err = -1;
>>>>>>> @@ -264,13 +266,17 @@ static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
>>>>>>>         ofnode subnode;
>>>>>>>
>>>>>>>         plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
>>>>>>> -       plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
>>>>>>> +       plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
>>>>>>> +                       &plat->ahbsize);
>>>>>>>         plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
>>>>>>>         plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
>>>>>>>         plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
>>>>>>>         plat->trigger_address = dev_read_u32_default(bus,
>>>>>>>                                                      "cdns,trigger-address",
>>>>>>>                                                      0);
>>>>>>> +       /* Use DAC mode only when MMIO window is at least 8M wide */
>>>>>>> +       if (plat->ahbsize >= SZ_8M)
>>>>>>> +               plat->use_dac_mode = true;
>>>>>>>
>>>>>>>         /* All other paramters are embedded in the child node */
>>>>>>>         subnode = dev_read_first_subnode(bus);
>>>>>>> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
>>>>>>> index e655b027d788..619f0bed8efd 100644
>>>>>>> --- a/drivers/spi/cadence_qspi.h
>>>>>>> +++ b/drivers/spi/cadence_qspi.h
>>>>>>> @@ -23,6 +23,8 @@ struct cadence_spi_platdata {
>>>>>>>         u32             fifo_depth;
>>>>>>>         u32             fifo_width;
>>>>>>>         u32             trigger_address;
>>>>>>> +       fdt_addr_t      ahbsize;
>>>>>>> +       bool            use_dac_mode;
>>>>>>>
>>>>>>>         /* Flash parameters */
>>>>>>>         u32             page_size;
>>>>>>> @@ -52,20 +54,21 @@ struct cadence_spi_priv {
>>>>>>>  void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
>>>>>>>  void cadence_qspi_apb_controller_enable(void *reg_base_addr);
>>>>>>>  void cadence_qspi_apb_controller_disable(void *reg_base_addr);
>>>>>>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base);
>>>>>>>
>>>>>>>  int cadence_qspi_apb_command_read(void *reg_base_addr,
>>>>>>>                                   const struct spi_mem_op *op);
>>>>>>>  int cadence_qspi_apb_command_write(void *reg_base_addr,
>>>>>>>                                    const struct spi_mem_op *op);
>>>>>>>
>>>>>>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
>>>>>>> -       const struct spi_mem_op *op);
>>>>>>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>>>>>>> -       unsigned int rxlen, u8 *rxbuf);
>>>>>>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>>>>>>> -       const struct spi_mem_op *op);
>>>>>>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>>>>>>> -       unsigned int txlen, const u8 *txbuf);
>>>>>>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
>>>>>>> +                               const struct spi_mem_op *op);
>>>>>>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
>>>>>>> +                                 const struct spi_mem_op *op);
>>>>>>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
>>>>>>> +                                const struct spi_mem_op *op);
>>>>>>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
>>>>>>> +                                  const struct spi_mem_op *op);
>>>>>>>
>>>>>>>  void cadence_qspi_apb_chipselect(void *reg_base,
>>>>>>>         unsigned int chip_select, unsigned int decoder_enable);
>>>>>>> diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
>>>>>>> index 8dd0495dfcf4..fd491f2c8104 100644
>>>>>>> --- a/drivers/spi/cadence_qspi_apb.c
>>>>>>> +++ b/drivers/spi/cadence_qspi_apb.c
>>>>>>> @@ -189,6 +189,15 @@ void cadence_qspi_apb_controller_disable(void *reg_base)
>>>>>>>         writel(reg, reg_base + CQSPI_REG_CONFIG);
>>>>>>>  }
>>>>>>>
>>>>>>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base)
>>>>>>> +{
>>>>>>> +       unsigned int reg;
>>>>>>> +
>>>>>>> +       reg = readl(reg_base + CQSPI_REG_CONFIG);
>>>>>>> +       reg |= CQSPI_REG_CONFIG_DIRECT;
>>>>>>> +       writel(reg, reg_base + CQSPI_REG_CONFIG);
>>>>>>> +}
>>>>>>> +
>>>>>>>  /* Return 1 if idle, otherwise return 0 (busy). */
>>>>>>>  static unsigned int cadence_qspi_wait_idle(void *reg_base)
>>>>>>>  {
>>>>>>> @@ -512,8 +521,8 @@ int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
>>>>>>>  }
>>>>>>>
>>>>>>>  /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
>>>>>>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
>>>>>>> -       const struct spi_mem_op *op)
>>>>>>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
>>>>>>> +                               const struct spi_mem_op *op)
>>>>>>>  {
>>>>>>>         unsigned int reg;
>>>>>>>         unsigned int rd_reg;
>>>>>>> @@ -577,8 +586,9 @@ static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
>>>>>>>         return -ETIMEDOUT;
>>>>>>>  }
>>>>>>>
>>>>>>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>>>>>>> -       unsigned int n_rx, u8 *rxbuf)
>>>>>>> +static int
>>>>>>> +cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
>>>>>>> +                                      unsigned int n_rx, u8 *rxbuf)
>>>>>>>  {
>>>>>>>         unsigned int remaining = n_rx;
>>>>>>>         unsigned int bytes_to_read = 0;
>>>>>>> @@ -639,9 +649,26 @@ failrd:
>>>>>>>         return ret;
>>>>>>>  }
>>>>>>>
>>>>>>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
>>>>>>> +                                 const struct spi_mem_op *op)
>>>>>>> +{
>>>>>>> +       u32 from = op->addr.val;
>>>>>>> +       void *buf = op->data.buf.in;
>>>>>>> +       size_t len = op->data.nbytes;
>>>>>>> +
>>>>>>> +       if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
>>>>>>> +               memcpy_fromio(buf, plat->ahbbase + from, len);
>>>>>>> +               if (!cadence_qspi_wait_idle(plat->regbase))
>>>>>>> +                       return -EIO;
>>>>>>> +               return 0;
>>>>>>> +       }
>>>>>>> +
>>>>>>> +       return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
>>>>>>> +}
>>>>>>> +
>>>>>>>  /* Opcode + Address (3/4 bytes) */
>>>>>>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>>>>>>> -       const struct spi_mem_op *op)
>>>>>>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
>>>>>>> +                                const struct spi_mem_op *op)
>>>>>>>  {
>>>>>>>         unsigned int reg;
>>>>>>>
>>>>>>> @@ -662,8 +689,9 @@ int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
>>>>>>>         return 0;
>>>>>>>  }
>>>>>>>
>>>>>>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>>>>>>> -       unsigned int n_tx, const u8 *txbuf)
>>>>>>> +static int
>>>>>>> +cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
>>>>>>> +                                       unsigned int n_tx, const u8 *txbuf)
>>>>>>>  {
>>>>>>>         unsigned int page_size = plat->page_size;
>>>>>>>         unsigned int remaining = n_tx;
>>>>>>> @@ -735,6 +763,23 @@ failwr:
>>>>>>>         return ret;
>>>>>>>  }
>>>>>>>
>>>>>>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
>>>>>>> +                                  const struct spi_mem_op *op)
>>>>>>> +{
>>>>>>> +       u32 to = op->addr.val;
>>>>>>> +       const void *buf = op->data.buf.out;
>>>>>>> +       size_t len = op->data.nbytes;
>>>>>>> +
>>>>>>> +       if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
>>>>>>> +               memcpy_toio(plat->ahbbase + to, buf, len);
>>>>>>> +               if (!cadence_qspi_wait_idle(plat->regbase))
>>>>>>> +                       return -EIO;
>>>>>>> +               return 0;
>>>>>>> +       }
>>>>>>> +
>>>>>>> +       return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
>>>>>>> +}
>>>>>>> +
>>>>>>>  void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
>>>>>>>  {
>>>>>>>         unsigned int reg;
>>>>>>> --
>>>>>>> 2.23.0
>>>>>>>
>>>>>
>>>>> --
>>>>> Regards
>>>>> Vignesh
>>
>> --
>> Regards
>> Vignesh
Simon Goldschmidt Oct. 23, 2019, 10:06 a.m. UTC | #8
On Wed, Oct 23, 2019 at 12:00 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
>
>
>
> On 18/10/19 6:12 PM, Simon Goldschmidt wrote:
> > On Fri, Oct 18, 2019 at 2:40 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
> >>
> >> Hi,
> >>
> >> On 18/10/19 2:34 PM, Simon Goldschmidt wrote:
> >>> On Thu, Oct 17, 2019 at 2:55 PM Simon Goldschmidt
> >>> <simon.k.r.goldschmidt@gmail.com> wrote:
> >>>>
> >>>> On Thu, Oct 17, 2019 at 2:44 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
> >>>>>
> >>>>> Hi,
> >>>>>
> >>>>> On 17/10/19 5:09 PM, Simon Goldschmidt wrote:
> >>>>>> On Mon, Oct 14, 2019 at 3:27 PM Vignesh Raghavendra <vigneshr@ti.com> wrote:
> >>>>>>>
> >>>>>>> Add support for Direct Access Controller mode of Cadence QSPI. This
> >>>>>>> allows MMIO access to SPI NOR flash providing better read performance.
> >>>>>>>
> >>>>>>> Signed-off-by: Vignesh R <vigneshr@ti.com>
> >>>>>>> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> >>>>>>
> >>>>>> I've tested this on my socfpga cyclone5 board with an mt25ql256a (running at
> >>>>>> 50MHz) and it seems to work fine.
> >>>>>>
> >>>>>> However, I had the impression it was a bit slower, not faster, although I
> >>>>>> haven't measured, and running at 50MHz with 4 data lines, reading the whole
> >>>>>> flash takes about 1.5 seconds only, so without actually measureing it, it's
> >>>>>> hard to tell if this performance is really worth the 440 bytes of extra code
> >>>>>> it adds?
> >>>>>>
> >>>>>
> >>>>> I should have noted in the commit msg that direct mode is used only when
> >>>>> AHB window is at least 8MB. Working with smaller window sizes would mean
> >>>>> code would fall back to indirect mode most of the time.
> >>>>> I see that socfgpa don't seem to have large AHB windows and therefore
> >>>>> would not use direct access mode.
> >>>>
> >>>> Aha. Yes, socfpga gen5 has a 1MB window only, I think. So the speed hasn't
> >>>> changed.
> >>>>
> >>>>>
> >>>>> On TI platforms its possible to use DMA to read data from memory mapped
> >>>>> region using mem to mem DMA channels which improves throughput by 5 times.
> >>>>
> >>>> Hmm, I'm using a QSPI at 50 MHz and transferring 31 MByte takes about 1.5
> >>>> seconds. The maximum speed of that setup would be 25 MByte/s without any
> >>>> overhead (1.24 seconds for 31 MB). There's no way I could achieve an improvement
> >>>> by 5 on my platform!
> >>>>
> >>>>>
> >>>>>> Note that I'm already short on RAM in SPL (socfpga gen5), so these 440 bytes
> >>>>>> do hurt. Note that patch 1/2 of this series reduced SPL code size by 320 bytes
> >>>>>> for me :-)
> >>>>>>
> >>>>>
> >>>>> Hmm, I read this as patch 1 brings down memory consumption by 320 bytes
> >>>>> and patch 2 adds 440 bytes (delta being +120 bytes for entire series).
> >>>>> I can see if size can be optimized, but would like to avoid #ifdef'ery
> >>>>> within the driver if possible.
> >>>>
> >>>> Well, I can only say I'm currently struggling to keep SPI-NOR and MMC enabled
> >>>> at the same time in socfpga_socrates (moving more code to DM). And even if it
> >>>> sounds like not much, 440 bytes *are* much for me at this point.
> >>>>
> >>>> I still would prefer having a Kconfig option for this that can be disabled
> >>>> for socfpga.
> >>>
> >>> Oh well, maybe just go ahead adding this and I'll try how it fits. We can alway
> >>> add reduced functionality later by checking for CONFIG_SPL_SPI_FLASH_TINY,
> >>> whicht might be a better config option to guard this than something cadence-qspi
> >>> specific.
> >>>
> >>
> >> Okay, thanks! Guarding with CONFIG_SPL_SPI_FLASH_TINY as and where
> >> required seems fair idea. BTW, it would be great if you could confirm
> >> Quad read works fine when spi-tx-bus-width = 1 and spi-rx-bus-width = 4
> >> as I suggested in other thread?
> >
> > Yes, I'll do that when I find the time. Unfortunately, I can only do
> > that on the devices
> > at work, not at home.
>
> Alright, thanks!
>
> > I also would like to check the memory-mapped code works on
> > socfpga by removing the 8MB size check, does that make sense?
> >
>
> Well, in that case memory-mapped mode will be used for accessing data
> within first 1MB of flash. Rest of the data would be accessed via
> indirect mode.
> I think if AHB window is small, then probably SoC designers did not
> intend, controller to be used in DAC or memory-mapped mode.

Oh, well I thought you would then switch the window (register named
'remapaddr' for cyclone5 qspi)? I admit, I haven't looked at that code...

Would it make sense to implement that? Sequential reads should
still benefit even if switching the window every X MB.

Regards,
Simon


>
> Regards
> Vignesh
>
> > Regards,
> > Simon
> >
> >>
> >> Regards
> >> Vignesh
> >>
> >>> Regards,
> >>> Simon
> >>>
> >>>>
> >>>> Regards,
> >>>> Simon
> >>>>
> >>>>>
> >>>>> Regards
> >>>>> Vignesh
> >>>>>
> >>>>>> Regards,
> >>>>>> Simon
> >>>>>>
> >>>>>>
> >>>>>>> ---
> >>>>>>>  drivers/spi/cadence_qspi.c     | 40 ++++++++++++----------
> >>>>>>>  drivers/spi/cadence_qspi.h     | 19 ++++++-----
> >>>>>>>  drivers/spi/cadence_qspi_apb.c | 61 +++++++++++++++++++++++++++++-----
> >>>>>>>  3 files changed, 87 insertions(+), 33 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
> >>>>>>> index 673a2e9a6c4c..6c98cbf39ae4 100644
> >>>>>>> --- a/drivers/spi/cadence_qspi.c
> >>>>>>> +++ b/drivers/spi/cadence_qspi.c
> >>>>>>> @@ -12,12 +12,13 @@
> >>>>>>>  #include <spi.h>
> >>>>>>>  #include <spi-mem.h>
> >>>>>>>  #include <linux/errno.h>
> >>>>>>> +#include <linux/sizes.h>
> >>>>>>>  #include "cadence_qspi.h"
> >>>>>>>
> >>>>>>>  #define CQSPI_STIG_READ                        0
> >>>>>>>  #define CQSPI_STIG_WRITE               1
> >>>>>>> -#define CQSPI_INDIRECT_READ            2
> >>>>>>> -#define CQSPI_INDIRECT_WRITE           3
> >>>>>>> +#define CQSPI_READ                     2
> >>>>>>> +#define CQSPI_WRITE                    3
> >>>>>>>
> >>>>>>>  static int cadence_spi_write_speed(struct udevice *bus, uint hz)
> >>>>>>>  {
> >>>>>>> @@ -189,6 +190,7 @@ static int cadence_spi_remove(struct udevice *dev)
> >>>>>>>
> >>>>>>>  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
> >>>>>>>  {
> >>>>>>> +       struct cadence_spi_platdata *plat = bus->platdata;
> >>>>>>>         struct cadence_spi_priv *priv = dev_get_priv(bus);
> >>>>>>>
> >>>>>>>         /* Disable QSPI */
> >>>>>>> @@ -197,6 +199,10 @@ static int cadence_spi_set_mode(struct udevice *bus, uint mode)
> >>>>>>>         /* Set SPI mode */
> >>>>>>>         cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
> >>>>>>>
> >>>>>>> +       /* Enable Direct Access Controller */
> >>>>>>> +       if (plat->use_dac_mode)
> >>>>>>> +               cadence_qspi_apb_dac_mode_enable(priv->regbase);
> >>>>>>> +
> >>>>>>>         /* Enable QSPI */
> >>>>>>>         cadence_qspi_apb_controller_enable(priv->regbase);
> >>>>>>>
> >>>>>>> @@ -221,12 +227,12 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
> >>>>>>>                 if (!op->addr.nbytes)
> >>>>>>>                         mode = CQSPI_STIG_READ;
> >>>>>>>                 else
> >>>>>>> -                       mode = CQSPI_INDIRECT_READ;
> >>>>>>> +                       mode = CQSPI_READ;
> >>>>>>>         } else {
> >>>>>>>                 if (!op->addr.nbytes || !op->data.buf.out)
> >>>>>>>                         mode = CQSPI_STIG_WRITE;
> >>>>>>>                 else
> >>>>>>> -                       mode = CQSPI_INDIRECT_WRITE;
> >>>>>>> +                       mode = CQSPI_WRITE;
> >>>>>>>         }
> >>>>>>>
> >>>>>>>         switch (mode) {
> >>>>>>> @@ -236,19 +242,15 @@ static int cadence_spi_mem_exec_op(struct spi_slave *spi,
> >>>>>>>         case CQSPI_STIG_WRITE:
> >>>>>>>                 err = cadence_qspi_apb_command_write(base, op);
> >>>>>>>                 break;
> >>>>>>> -       case CQSPI_INDIRECT_READ:
> >>>>>>> -               err = cadence_qspi_apb_indirect_read_setup(plat, op);
> >>>>>>> -               if (!err) {
> >>>>>>> -                       err = cadence_qspi_apb_indirect_read_execute
> >>>>>>> -                               (plat, op->data.nbytes, op->data.buf.in);
> >>>>>>> -               }
> >>>>>>> +       case CQSPI_READ:
> >>>>>>> +               err = cadence_qspi_apb_read_setup(plat, op);
> >>>>>>> +               if (!err)
> >>>>>>> +                       err = cadence_qspi_apb_read_execute(plat, op);
> >>>>>>>                 break;
> >>>>>>> -       case CQSPI_INDIRECT_WRITE:
> >>>>>>> -               err = cadence_qspi_apb_indirect_write_setup(plat, op);
> >>>>>>> -               if (!err) {
> >>>>>>> -                       err = cadence_qspi_apb_indirect_write_execute
> >>>>>>> -                       (plat, op->data.nbytes, op->data.buf.out);
> >>>>>>> -               }
> >>>>>>> +       case CQSPI_WRITE:
> >>>>>>> +               err = cadence_qspi_apb_write_setup(plat, op);
> >>>>>>> +               if (!err)
> >>>>>>> +                       err = cadence_qspi_apb_write_execute(plat, op);
> >>>>>>>                 break;
> >>>>>>>         default:
> >>>>>>>                 err = -1;
> >>>>>>> @@ -264,13 +266,17 @@ static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
> >>>>>>>         ofnode subnode;
> >>>>>>>
> >>>>>>>         plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
> >>>>>>> -       plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
> >>>>>>> +       plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
> >>>>>>> +                       &plat->ahbsize);
> >>>>>>>         plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
> >>>>>>>         plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
> >>>>>>>         plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
> >>>>>>>         plat->trigger_address = dev_read_u32_default(bus,
> >>>>>>>                                                      "cdns,trigger-address",
> >>>>>>>                                                      0);
> >>>>>>> +       /* Use DAC mode only when MMIO window is at least 8M wide */
> >>>>>>> +       if (plat->ahbsize >= SZ_8M)
> >>>>>>> +               plat->use_dac_mode = true;
> >>>>>>>
> >>>>>>>         /* All other paramters are embedded in the child node */
> >>>>>>>         subnode = dev_read_first_subnode(bus);
> >>>>>>> diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
> >>>>>>> index e655b027d788..619f0bed8efd 100644
> >>>>>>> --- a/drivers/spi/cadence_qspi.h
> >>>>>>> +++ b/drivers/spi/cadence_qspi.h
> >>>>>>> @@ -23,6 +23,8 @@ struct cadence_spi_platdata {
> >>>>>>>         u32             fifo_depth;
> >>>>>>>         u32             fifo_width;
> >>>>>>>         u32             trigger_address;
> >>>>>>> +       fdt_addr_t      ahbsize;
> >>>>>>> +       bool            use_dac_mode;
> >>>>>>>
> >>>>>>>         /* Flash parameters */
> >>>>>>>         u32             page_size;
> >>>>>>> @@ -52,20 +54,21 @@ struct cadence_spi_priv {
> >>>>>>>  void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
> >>>>>>>  void cadence_qspi_apb_controller_enable(void *reg_base_addr);
> >>>>>>>  void cadence_qspi_apb_controller_disable(void *reg_base_addr);
> >>>>>>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base);
> >>>>>>>
> >>>>>>>  int cadence_qspi_apb_command_read(void *reg_base_addr,
> >>>>>>>                                   const struct spi_mem_op *op);
> >>>>>>>  int cadence_qspi_apb_command_write(void *reg_base_addr,
> >>>>>>>                                    const struct spi_mem_op *op);
> >>>>>>>
> >>>>>>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> >>>>>>> -       const struct spi_mem_op *op);
> >>>>>>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >>>>>>> -       unsigned int rxlen, u8 *rxbuf);
> >>>>>>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >>>>>>> -       const struct spi_mem_op *op);
> >>>>>>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >>>>>>> -       unsigned int txlen, const u8 *txbuf);
> >>>>>>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> >>>>>>> +                               const struct spi_mem_op *op);
> >>>>>>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> >>>>>>> +                                 const struct spi_mem_op *op);
> >>>>>>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> >>>>>>> +                                const struct spi_mem_op *op);
> >>>>>>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> >>>>>>> +                                  const struct spi_mem_op *op);
> >>>>>>>
> >>>>>>>  void cadence_qspi_apb_chipselect(void *reg_base,
> >>>>>>>         unsigned int chip_select, unsigned int decoder_enable);
> >>>>>>> diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
> >>>>>>> index 8dd0495dfcf4..fd491f2c8104 100644
> >>>>>>> --- a/drivers/spi/cadence_qspi_apb.c
> >>>>>>> +++ b/drivers/spi/cadence_qspi_apb.c
> >>>>>>> @@ -189,6 +189,15 @@ void cadence_qspi_apb_controller_disable(void *reg_base)
> >>>>>>>         writel(reg, reg_base + CQSPI_REG_CONFIG);
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +void cadence_qspi_apb_dac_mode_enable(void *reg_base)
> >>>>>>> +{
> >>>>>>> +       unsigned int reg;
> >>>>>>> +
> >>>>>>> +       reg = readl(reg_base + CQSPI_REG_CONFIG);
> >>>>>>> +       reg |= CQSPI_REG_CONFIG_DIRECT;
> >>>>>>> +       writel(reg, reg_base + CQSPI_REG_CONFIG);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>  /* Return 1 if idle, otherwise return 0 (busy). */
> >>>>>>>  static unsigned int cadence_qspi_wait_idle(void *reg_base)
> >>>>>>>  {
> >>>>>>> @@ -512,8 +521,8 @@ int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
> >>>>>>>  }
> >>>>>>>
> >>>>>>>  /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
> >>>>>>> -int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
> >>>>>>> -       const struct spi_mem_op *op)
> >>>>>>> +int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
> >>>>>>> +                               const struct spi_mem_op *op)
> >>>>>>>  {
> >>>>>>>         unsigned int reg;
> >>>>>>>         unsigned int rd_reg;
> >>>>>>> @@ -577,8 +586,9 @@ static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
> >>>>>>>         return -ETIMEDOUT;
> >>>>>>>  }
> >>>>>>>
> >>>>>>> -int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >>>>>>> -       unsigned int n_rx, u8 *rxbuf)
> >>>>>>> +static int
> >>>>>>> +cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
> >>>>>>> +                                      unsigned int n_rx, u8 *rxbuf)
> >>>>>>>  {
> >>>>>>>         unsigned int remaining = n_rx;
> >>>>>>>         unsigned int bytes_to_read = 0;
> >>>>>>> @@ -639,9 +649,26 @@ failrd:
> >>>>>>>         return ret;
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
> >>>>>>> +                                 const struct spi_mem_op *op)
> >>>>>>> +{
> >>>>>>> +       u32 from = op->addr.val;
> >>>>>>> +       void *buf = op->data.buf.in;
> >>>>>>> +       size_t len = op->data.nbytes;
> >>>>>>> +
> >>>>>>> +       if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
> >>>>>>> +               memcpy_fromio(buf, plat->ahbbase + from, len);
> >>>>>>> +               if (!cadence_qspi_wait_idle(plat->regbase))
> >>>>>>> +                       return -EIO;
> >>>>>>> +               return 0;
> >>>>>>> +       }
> >>>>>>> +
> >>>>>>> +       return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>  /* Opcode + Address (3/4 bytes) */
> >>>>>>> -int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >>>>>>> -       const struct spi_mem_op *op)
> >>>>>>> +int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
> >>>>>>> +                                const struct spi_mem_op *op)
> >>>>>>>  {
> >>>>>>>         unsigned int reg;
> >>>>>>>
> >>>>>>> @@ -662,8 +689,9 @@ int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
> >>>>>>>         return 0;
> >>>>>>>  }
> >>>>>>>
> >>>>>>> -int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >>>>>>> -       unsigned int n_tx, const u8 *txbuf)
> >>>>>>> +static int
> >>>>>>> +cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
> >>>>>>> +                                       unsigned int n_tx, const u8 *txbuf)
> >>>>>>>  {
> >>>>>>>         unsigned int page_size = plat->page_size;
> >>>>>>>         unsigned int remaining = n_tx;
> >>>>>>> @@ -735,6 +763,23 @@ failwr:
> >>>>>>>         return ret;
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
> >>>>>>> +                                  const struct spi_mem_op *op)
> >>>>>>> +{
> >>>>>>> +       u32 to = op->addr.val;
> >>>>>>> +       const void *buf = op->data.buf.out;
> >>>>>>> +       size_t len = op->data.nbytes;
> >>>>>>> +
> >>>>>>> +       if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
> >>>>>>> +               memcpy_toio(plat->ahbbase + to, buf, len);
> >>>>>>> +               if (!cadence_qspi_wait_idle(plat->regbase))
> >>>>>>> +                       return -EIO;
> >>>>>>> +               return 0;
> >>>>>>> +       }
> >>>>>>> +
> >>>>>>> +       return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>  void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
> >>>>>>>  {
> >>>>>>>         unsigned int reg;
> >>>>>>> --
> >>>>>>> 2.23.0
> >>>>>>>
> >>>>>
> >>>>> --
> >>>>> Regards
> >>>>> Vignesh
> >>
> >> --
> >> Regards
> >> Vignesh
>
> --
> Regards
> Vignesh

Patch
diff mbox series

diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
index 673a2e9a6c4c..6c98cbf39ae4 100644
--- a/drivers/spi/cadence_qspi.c
+++ b/drivers/spi/cadence_qspi.c
@@ -12,12 +12,13 @@ 
 #include <spi.h>
 #include <spi-mem.h>
 #include <linux/errno.h>
+#include <linux/sizes.h>
 #include "cadence_qspi.h"
 
 #define CQSPI_STIG_READ			0
 #define CQSPI_STIG_WRITE		1
-#define CQSPI_INDIRECT_READ		2
-#define CQSPI_INDIRECT_WRITE		3
+#define CQSPI_READ			2
+#define CQSPI_WRITE			3
 
 static int cadence_spi_write_speed(struct udevice *bus, uint hz)
 {
@@ -189,6 +190,7 @@  static int cadence_spi_remove(struct udevice *dev)
 
 static int cadence_spi_set_mode(struct udevice *bus, uint mode)
 {
+	struct cadence_spi_platdata *plat = bus->platdata;
 	struct cadence_spi_priv *priv = dev_get_priv(bus);
 
 	/* Disable QSPI */
@@ -197,6 +199,10 @@  static int cadence_spi_set_mode(struct udevice *bus, uint mode)
 	/* Set SPI mode */
 	cadence_qspi_apb_set_clk_mode(priv->regbase, mode);
 
+	/* Enable Direct Access Controller */
+	if (plat->use_dac_mode)
+		cadence_qspi_apb_dac_mode_enable(priv->regbase);
+
 	/* Enable QSPI */
 	cadence_qspi_apb_controller_enable(priv->regbase);
 
@@ -221,12 +227,12 @@  static int cadence_spi_mem_exec_op(struct spi_slave *spi,
 		if (!op->addr.nbytes)
 			mode = CQSPI_STIG_READ;
 		else
-			mode = CQSPI_INDIRECT_READ;
+			mode = CQSPI_READ;
 	} else {
 		if (!op->addr.nbytes || !op->data.buf.out)
 			mode = CQSPI_STIG_WRITE;
 		else
-			mode = CQSPI_INDIRECT_WRITE;
+			mode = CQSPI_WRITE;
 	}
 
 	switch (mode) {
@@ -236,19 +242,15 @@  static int cadence_spi_mem_exec_op(struct spi_slave *spi,
 	case CQSPI_STIG_WRITE:
 		err = cadence_qspi_apb_command_write(base, op);
 		break;
-	case CQSPI_INDIRECT_READ:
-		err = cadence_qspi_apb_indirect_read_setup(plat, op);
-		if (!err) {
-			err = cadence_qspi_apb_indirect_read_execute
-				(plat, op->data.nbytes, op->data.buf.in);
-		}
+	case CQSPI_READ:
+		err = cadence_qspi_apb_read_setup(plat, op);
+		if (!err)
+			err = cadence_qspi_apb_read_execute(plat, op);
 		break;
-	case CQSPI_INDIRECT_WRITE:
-		err = cadence_qspi_apb_indirect_write_setup(plat, op);
-		if (!err) {
-			err = cadence_qspi_apb_indirect_write_execute
-			(plat, op->data.nbytes, op->data.buf.out);
-		}
+	case CQSPI_WRITE:
+		err = cadence_qspi_apb_write_setup(plat, op);
+		if (!err)
+			err = cadence_qspi_apb_write_execute(plat, op);
 		break;
 	default:
 		err = -1;
@@ -264,13 +266,17 @@  static int cadence_spi_ofdata_to_platdata(struct udevice *bus)
 	ofnode subnode;
 
 	plat->regbase = (void *)devfdt_get_addr_index(bus, 0);
-	plat->ahbbase = (void *)devfdt_get_addr_index(bus, 1);
+	plat->ahbbase = (void *)devfdt_get_addr_size_index(bus, 1,
+			&plat->ahbsize);
 	plat->is_decoded_cs = dev_read_bool(bus, "cdns,is-decoded-cs");
 	plat->fifo_depth = dev_read_u32_default(bus, "cdns,fifo-depth", 128);
 	plat->fifo_width = dev_read_u32_default(bus, "cdns,fifo-width", 4);
 	plat->trigger_address = dev_read_u32_default(bus,
 						     "cdns,trigger-address",
 						     0);
+	/* Use DAC mode only when MMIO window is at least 8M wide */
+	if (plat->ahbsize >= SZ_8M)
+		plat->use_dac_mode = true;
 
 	/* All other paramters are embedded in the child node */
 	subnode = dev_read_first_subnode(bus);
diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
index e655b027d788..619f0bed8efd 100644
--- a/drivers/spi/cadence_qspi.h
+++ b/drivers/spi/cadence_qspi.h
@@ -23,6 +23,8 @@  struct cadence_spi_platdata {
 	u32		fifo_depth;
 	u32		fifo_width;
 	u32		trigger_address;
+	fdt_addr_t	ahbsize;
+	bool		use_dac_mode;
 
 	/* Flash parameters */
 	u32		page_size;
@@ -52,20 +54,21 @@  struct cadence_spi_priv {
 void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat);
 void cadence_qspi_apb_controller_enable(void *reg_base_addr);
 void cadence_qspi_apb_controller_disable(void *reg_base_addr);
+void cadence_qspi_apb_dac_mode_enable(void *reg_base);
 
 int cadence_qspi_apb_command_read(void *reg_base_addr,
 				  const struct spi_mem_op *op);
 int cadence_qspi_apb_command_write(void *reg_base_addr,
 				   const struct spi_mem_op *op);
 
-int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
-	const struct spi_mem_op *op);
-int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
-	unsigned int rxlen, u8 *rxbuf);
-int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
-	const struct spi_mem_op *op);
-int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
-	unsigned int txlen, const u8 *txbuf);
+int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
+				const struct spi_mem_op *op);
+int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
+				  const struct spi_mem_op *op);
+int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
+				 const struct spi_mem_op *op);
+int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
+				   const struct spi_mem_op *op);
 
 void cadence_qspi_apb_chipselect(void *reg_base,
 	unsigned int chip_select, unsigned int decoder_enable);
diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
index 8dd0495dfcf4..fd491f2c8104 100644
--- a/drivers/spi/cadence_qspi_apb.c
+++ b/drivers/spi/cadence_qspi_apb.c
@@ -189,6 +189,15 @@  void cadence_qspi_apb_controller_disable(void *reg_base)
 	writel(reg, reg_base + CQSPI_REG_CONFIG);
 }
 
+void cadence_qspi_apb_dac_mode_enable(void *reg_base)
+{
+	unsigned int reg;
+
+	reg = readl(reg_base + CQSPI_REG_CONFIG);
+	reg |= CQSPI_REG_CONFIG_DIRECT;
+	writel(reg, reg_base + CQSPI_REG_CONFIG);
+}
+
 /* Return 1 if idle, otherwise return 0 (busy). */
 static unsigned int cadence_qspi_wait_idle(void *reg_base)
 {
@@ -512,8 +521,8 @@  int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
 }
 
 /* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
-int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
-	const struct spi_mem_op *op)
+int cadence_qspi_apb_read_setup(struct cadence_spi_platdata *plat,
+				const struct spi_mem_op *op)
 {
 	unsigned int reg;
 	unsigned int rd_reg;
@@ -577,8 +586,9 @@  static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
 	return -ETIMEDOUT;
 }
 
-int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
-	unsigned int n_rx, u8 *rxbuf)
+static int
+cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
+				       unsigned int n_rx, u8 *rxbuf)
 {
 	unsigned int remaining = n_rx;
 	unsigned int bytes_to_read = 0;
@@ -639,9 +649,26 @@  failrd:
 	return ret;
 }
 
+int cadence_qspi_apb_read_execute(struct cadence_spi_platdata *plat,
+				  const struct spi_mem_op *op)
+{
+	u32 from = op->addr.val;
+	void *buf = op->data.buf.in;
+	size_t len = op->data.nbytes;
+
+	if (plat->use_dac_mode && (from + len < plat->ahbsize)) {
+		memcpy_fromio(buf, plat->ahbbase + from, len);
+		if (!cadence_qspi_wait_idle(plat->regbase))
+			return -EIO;
+		return 0;
+	}
+
+	return cadence_qspi_apb_indirect_read_execute(plat, len, buf);
+}
+
 /* Opcode + Address (3/4 bytes) */
-int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
-	const struct spi_mem_op *op)
+int cadence_qspi_apb_write_setup(struct cadence_spi_platdata *plat,
+				 const struct spi_mem_op *op)
 {
 	unsigned int reg;
 
@@ -662,8 +689,9 @@  int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
 	return 0;
 }
 
-int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
-	unsigned int n_tx, const u8 *txbuf)
+static int
+cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
+					unsigned int n_tx, const u8 *txbuf)
 {
 	unsigned int page_size = plat->page_size;
 	unsigned int remaining = n_tx;
@@ -735,6 +763,23 @@  failwr:
 	return ret;
 }
 
+int cadence_qspi_apb_write_execute(struct cadence_spi_platdata *plat,
+				   const struct spi_mem_op *op)
+{
+	u32 to = op->addr.val;
+	const void *buf = op->data.buf.out;
+	size_t len = op->data.nbytes;
+
+	if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
+		memcpy_toio(plat->ahbbase + to, buf, len);
+		if (!cadence_qspi_wait_idle(plat->regbase))
+			return -EIO;
+		return 0;
+	}
+
+	return cadence_qspi_apb_indirect_write_execute(plat, len, buf);
+}
+
 void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
 {
 	unsigned int reg;