diff mbox series

[v2,13/17] spi: mpc8xx: Use 16 bit mode for large transfers with even size

Message ID 9195d08a754418254471b313d9390bc8773a163b.1713160866.git.christophe.leroy@csgroup.eu
State Accepted
Commit dff36805c7fca1edabf3a8be94a8431928e3aec8
Delegated to: Tom Rini
Headers show
Series Misc changes for CSSI boards | expand

Commit Message

Christophe Leroy April 15, 2024, 6:07 a.m. UTC
On CPM, the RISC core is a lot more efficiant when doing transfers
in 16-bits chunks than in 8-bits chunks, but unfortunately the
words need to be byte swapped.

So, for large tranfers with an even size, allocate a temporary
buffer and byte-swap data before and after transfer.

This change allows setting higher speed for transfer. For instance
on an MPC 8xx (CPM1 comms RISC processor), the documentation tells
that transfer in byte mode at 1 kbit/s uses 0.200% of CPM load
at 25 MHz while a word transfer at the same speed uses 0.032%
of CPM load. This means the speed can be 6 times higher in
word mode for the same CPM load.

For small transfers, the load reduction is not worth the CPU load
required to allocate the temporary buffer, so do it only when data
size is over 64 bytes.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 drivers/spi/mpc8xx_spi.c | 44 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/spi/mpc8xx_spi.c b/drivers/spi/mpc8xx_spi.c
index a193ac711b..b1abfbf4fc 100644
--- a/drivers/spi/mpc8xx_spi.c
+++ b/drivers/spi/mpc8xx_spi.c
@@ -18,6 +18,7 @@ 
 
 #include <common.h>
 #include <dm.h>
+#include <malloc.h>
 #include <mpc8xx.h>
 #include <spi.h>
 #include <linux/delay.h>
@@ -30,6 +31,7 @@ 
 #define CPM_SPI_BASE_TX	(CPM_SPI_BASE + sizeof(cbd_t))
 
 #define MAX_BUFFER	0x8000 /* Max possible is 0xffff. We want power of 2 */
+#define MIN_HWORD_XFER	64	/* Minimum size for 16 bits transfer */
 
 struct mpc8xx_priv {
 	spi_t __iomem *spi;
@@ -149,23 +151,46 @@  static int mpc8xx_spi_xfer_one(struct udevice *dev, size_t count,
 	immap_t __iomem *immr = (immap_t __iomem *)CONFIG_SYS_IMMR;
 	cpm8xx_t __iomem *cp = &immr->im_cpm;
 	cbd_t __iomem *tbdf, *rbdf;
+	void *bufout, *bufin;
+	u16 spmode_len;
 	int tm;
 
 	tbdf = (cbd_t __iomem *)&cp->cp_dpmem[CPM_SPI_BASE_TX];
 	rbdf = (cbd_t __iomem *)&cp->cp_dpmem[CPM_SPI_BASE_RX];
 
+	if (!(count & 1) && count >= MIN_HWORD_XFER) {
+		spmode_len = SPMODE_LEN(16);
+		if (dout) {
+			int i;
+
+			bufout = malloc(count);
+			for (i = 0; i < count; i += 2)
+				*(u16 *)(bufout + i) = swab16(*(u16 *)(dout + i));
+		} else {
+			bufout = NULL;
+		}
+		if (din)
+			bufin = malloc(count);
+		else
+			bufin = NULL;
+	} else {
+		spmode_len = SPMODE_LEN(8);
+		bufout = (void *)dout;
+		bufin = din;
+	}
+
 	/* Setting tx bd status and data length */
-	out_be32(&tbdf->cbd_bufaddr, dout ? (ulong)dout : (ulong)dummy_buffer);
+	out_be32(&tbdf->cbd_bufaddr, bufout ? (ulong)bufout : (ulong)dummy_buffer);
 	out_be16(&tbdf->cbd_sc, BD_SC_READY | BD_SC_LAST | BD_SC_WRAP);
 	out_be16(&tbdf->cbd_datlen, count);
 
 	/* Setting rx bd status and data length */
-	out_be32(&rbdf->cbd_bufaddr, din ? (ulong)din : (ulong)dummy_buffer);
+	out_be32(&rbdf->cbd_bufaddr, bufin ? (ulong)bufin : (ulong)dummy_buffer);
 	out_be16(&rbdf->cbd_sc, BD_SC_EMPTY | BD_SC_WRAP);
 	out_be16(&rbdf->cbd_datlen, 0);	 /* rx length has no significance */
 
 	clrsetbits_be16(&cp->cp_spmode, ~SPMODE_LOOP, SPMODE_REV | SPMODE_MSTR |
-			SPMODE_EN | SPMODE_LEN(8) | SPMODE_PM(0x8));
+			SPMODE_EN | spmode_len | SPMODE_PM(0x8));
 	out_8(&cp->cp_spim, 0);		/* Mask  all SPI events */
 	out_8(&cp->cp_spie, SPI_EMASK);	/* Clear all SPI events	*/
 
@@ -188,6 +213,19 @@  static int mpc8xx_spi_xfer_one(struct udevice *dev, size_t count,
 	if (tm >= 1000)
 		return -ETIMEDOUT;
 
+	if (!(count & 1) && count > MIN_HWORD_XFER) {
+		if (dout)
+			free(bufout);
+		if (din) {
+			int i;
+
+			bufout = malloc(count);
+			for (i = 0; i < count; i += 2)
+				*(u16 *)(din + i) = swab16(*(u16 *)(bufin + i));
+			free(bufin);
+		}
+	}
+
 	return 0;
 }