diff mbox

[U-Boot,V4] net: fec_mxc: allow use with cache enabled

Message ID 1331740746-7412-1-git-send-email-eric.nelson@boundarydevices.com
State Changes Requested
Headers show

Commit Message

Eric Nelson March 14, 2012, 3:59 p.m. UTC
ensure that transmit and receive buffers are cache-line aligned
        invalidate cache for each packet as received
	update receive buffer descriptors one cache line at a time
        flush cache before transmitting

Original patch by Marek:
	http://lists.denx.de/pipermail/u-boot/2012-February/117695.html

Signed-off-by: Eric Nelson <eric.nelson at boundarydevices.com>
---
V2 addresses some concerns from the ML:
	- Use readl()/writel() instead of mapped data structure
	  accesses
	- Wrong comment style
	- &rbd_base[0] == rbd_base
	removed 'volatile' from fec_send().

V3 updates from ML (and Marek):
	consolidated CONFIG_FEC_DATA_ALIGNMENT and CONFIG_FEC_DESC_ALIGNMENT
	added cache flushes after initialization of TBD/RBD

V4 updates from ML
	http://lists.denx.de/pipermail/u-boot/2012-March#120139
	remove tabs after #define/#if/#error
	replace CONFIG_FEC_ALIGN with ARCH_DMA_MINALIGN	

 drivers/net/fec_mxc.c |  277 ++++++++++++++++++++++++++++++++++---------------
 drivers/net/fec_mxc.h |   19 +---
 2 files changed, 192 insertions(+), 104 deletions(-)

Comments

Marek Vasut March 25, 2012, 11:57 p.m. UTC | #1
Dear Eric Nelson,

> 	ensure that transmit and receive buffers are cache-line aligned
>         invalidate cache for each packet as received
> 	update receive buffer descriptors one cache line at a time
>         flush cache before transmitting
> 
> Original patch by Marek:
> 	http://lists.denx.de/pipermail/u-boot/2012-February/117695.html
> 
> Signed-off-by: Eric Nelson <eric.nelson at boundarydevices.com>
> ---
> V2 addresses some concerns from the ML:
> 	- Use readl()/writel() instead of mapped data structure
> 	  accesses
> 	- Wrong comment style
> 	- &rbd_base[0] == rbd_base
> 	removed 'volatile' from fec_send().
> 
> V3 updates from ML (and Marek):
> 	consolidated CONFIG_FEC_DATA_ALIGNMENT and CONFIG_FEC_DESC_ALIGNMENT
> 	added cache flushes after initialization of TBD/RBD
> 
> V4 updates from ML
> 	http://lists.denx.de/pipermail/u-boot/2012-March#120139
> 	remove tabs after #define/#if/#error
> 	replace CONFIG_FEC_ALIGN with ARCH_DMA_MINALIGN
> 

Acked-by: Marek Vasut <marex@denx.de>

Didn't I ack some previous version? Maybe I even added tested-by to some 
previous version ;-)

Best regards,
Marek Vasut
Eric Nelson March 26, 2012, 3 a.m. UTC | #2
On 03/25/2012 04:57 PM, Marek Vasut wrote:
> Dear Eric Nelson,
>
>> 	ensure that transmit and receive buffers are cache-line aligned
>>          invalidate cache for each packet as received
>> 	update receive buffer descriptors one cache line at a time
>>          flush cache before transmitting
>>
>> Original patch by Marek:
>> 	http://lists.denx.de/pipermail/u-boot/2012-February/117695.html
>>
>> Signed-off-by: Eric Nelson<eric.nelson at boundarydevices.com>
>> ---
>>
 >> <snip>
>>
>> V4 updates from ML
>> 	http://lists.denx.de/pipermail/u-boot/2012-March#120139
>> 	remove tabs after #define/#if/#error
>> 	replace CONFIG_FEC_ALIGN with ARCH_DMA_MINALIGN
>>
>
> Acked-by: Marek Vasut<marex@denx.de>
>
> Didn't I ack some previous version? Maybe I even added tested-by to some
> previous version ;-)
>
Thanks Marek,

I couldn't find an official ack of the whole thing, though most of the code
structure came from you and were acked in parts.

I'm just trying to push it into the end zone so we can get completely
cache-enabled on i.MX6.

Regards,


Eric
diff mbox

Patch

diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c
index 1fdd071..9e71118 100644
--- a/drivers/net/fec_mxc.c
+++ b/drivers/net/fec_mxc.c
@@ -38,16 +38,28 @@  DECLARE_GLOBAL_DATA_PTR;
 #error "CONFIG_MII has to be defined!"
 #endif
 
-#ifndef	CONFIG_FEC_XCV_TYPE
-#define	CONFIG_FEC_XCV_TYPE	MII100
+#ifndef CONFIG_FEC_XCV_TYPE
+#define CONFIG_FEC_XCV_TYPE MII100
 #endif
 
 /*
  * The i.MX28 operates with packets in big endian. We need to swap them before
  * sending and after receiving.
  */
-#ifdef	CONFIG_MX28
-#define	CONFIG_FEC_MXC_SWAP_PACKET
+#ifdef CONFIG_MX28
+#define CONFIG_FEC_MXC_SWAP_PACKET
+#endif
+
+#define RXDESC_PER_CACHELINE (ARCH_DMA_MINALIGN/sizeof(struct fec_bd))
+
+/* Check various alignment issues at compile time */
+#if ((ARCH_DMA_MINALIGN < 16) || (ARCH_DMA_MINALIGN % 16 != 0))
+#error "ARCH_DMA_MINALIGN must be multiple of 16!"
+#endif
+
+#if ((PKTALIGN < ARCH_DMA_MINALIGN) || \
+	(PKTALIGN % ARCH_DMA_MINALIGN != 0))
+#error "PKTALIGN must be multiple of ARCH_DMA_MINALIGN!"
 #endif
 
 #undef DEBUG
@@ -59,7 +71,7 @@  struct nbuf {
 	uint8_t head[16];	/**< MAC header(6 + 6 + 2) + 2(aligned) */
 };
 
-#ifdef	CONFIG_FEC_MXC_SWAP_PACKET
+#ifdef CONFIG_FEC_MXC_SWAP_PACKET
 static void swap_packet(uint32_t *packet, int length)
 {
 	int i;
@@ -259,43 +271,52 @@  static int fec_tx_task_disable(struct fec_priv *fec)
  * Initialize receive task's buffer descriptors
  * @param[in] fec all we know about the device yet
  * @param[in] count receive buffer count to be allocated
- * @param[in] size size of each receive buffer
+ * @param[in] dsize desired size of each receive buffer
  * @return 0 on success
  *
  * For this task we need additional memory for the data buffers. And each
  * data buffer requires some alignment. Thy must be aligned to a specific
- * boundary each (DB_DATA_ALIGNMENT).
+ * boundary each.
  */
-static int fec_rbd_init(struct fec_priv *fec, int count, int size)
+static int fec_rbd_init(struct fec_priv *fec, int count, int dsize)
 {
-	int ix;
-	uint32_t p = 0;
-
-	/* reserve data memory and consider alignment */
-	if (fec->rdb_ptr == NULL)
-		fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT);
-	p = (uint32_t)fec->rdb_ptr;
-	if (!p) {
-		puts("fec_mxc: not enough malloc memory\n");
-		return -ENOMEM;
-	}
-	memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT);
-	p += DB_DATA_ALIGNMENT-1;
-	p &= ~(DB_DATA_ALIGNMENT-1);
-
-	for (ix = 0; ix < count; ix++) {
-		writel(p, &fec->rbd_base[ix].data_pointer);
-		p += size;
-		writew(FEC_RBD_EMPTY, &fec->rbd_base[ix].status);
-		writew(0, &fec->rbd_base[ix].data_length);
-	}
+	uint32_t size;
+	int i;
+
 	/*
-	 * mark the last RBD to close the ring
+	 * Allocate memory for the buffers. This allocation respects the
+	 * alignment
 	 */
-	writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[ix - 1].status);
+	size = roundup(dsize, ARCH_DMA_MINALIGN);
+	for (i = 0; i < count; i++) {
+		uint32_t data_ptr = readl(&fec->rbd_base[i].data_pointer);
+		if (data_ptr == 0) {
+			uint8_t *data = memalign(ARCH_DMA_MINALIGN,
+						 size);
+			if (!data) {
+				printf("%s: error allocating rxbuf %d\n",
+				       __func__, i);
+				goto err;
+			}
+			writel((uint32_t)data, &fec->rbd_base[i].data_pointer);
+		} /* needs allocation */
+		writew(FEC_RBD_EMPTY, &fec->rbd_base[i].status);
+		writew(0, &fec->rbd_base[i].data_length);
+	}
+
+	/* Mark the last RBD to close the ring. */
+	writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[i - 1].status);
 	fec->rbd_index = 0;
 
 	return 0;
+
+err:
+	for (; i >= 0; i--) {
+		uint32_t data_ptr = readl(&fec->rbd_base[i].data_pointer);
+		free((void *)data_ptr);
+	}
+
+	return -ENOMEM;
 }
 
 /**
@@ -312,9 +333,13 @@  static int fec_rbd_init(struct fec_priv *fec, int count, int size)
  */
 static void fec_tbd_init(struct fec_priv *fec)
 {
+	unsigned addr = (unsigned)fec->tbd_base;
+	unsigned size = roundup(2 * sizeof(struct fec_bd),
+				ARCH_DMA_MINALIGN);
 	writew(0x0000, &fec->tbd_base[0].status);
 	writew(FEC_TBD_WRAP, &fec->tbd_base[1].status);
 	fec->tbd_index = 0;
+	flush_dcache_range(addr, addr+size);
 }
 
 /**
@@ -324,16 +349,10 @@  static void fec_tbd_init(struct fec_priv *fec)
  */
 static void fec_rbd_clean(int last, struct fec_bd *pRbd)
 {
-	/*
-	 * Reset buffer descriptor as empty
-	 */
+	unsigned short flags = FEC_RBD_EMPTY;
 	if (last)
-		writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &pRbd->status);
-	else
-		writew(FEC_RBD_EMPTY, &pRbd->status);
-	/*
-	 * no data in it
-	 */
+		flags |= FEC_RBD_WRAP;
+	writew(flags, &pRbd->status);
 	writew(0, &pRbd->data_length);
 }
 
@@ -387,12 +406,25 @@  static int fec_open(struct eth_device *edev)
 {
 	struct fec_priv *fec = (struct fec_priv *)edev->priv;
 	int speed;
+	uint32_t addr, size;
+	int i;
 
 	debug("fec_open: fec_open(dev)\n");
 	/* full-duplex, heartbeat disabled */
 	writel(1 << 2, &fec->eth->x_cntrl);
 	fec->rbd_index = 0;
 
+	/* Invalidate all descriptors */
+	for (i = 0; i < FEC_RBD_NUM - 1; i++)
+		fec_rbd_clean(0, &fec->rbd_base[i]);
+	fec_rbd_clean(1, &fec->rbd_base[i]);
+
+	/* Flush the descriptors into RAM */
+	size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
+			ARCH_DMA_MINALIGN);
+	addr = (uint32_t)fec->rbd_base;
+	flush_dcache_range(addr, addr + size);
+
 #ifdef FEC_QUIRK_ENET_MAC
 	/* Enable ENET HW endian SWAP */
 	writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP,
@@ -478,38 +510,55 @@  static int fec_open(struct eth_device *edev)
 
 static int fec_init(struct eth_device *dev, bd_t* bd)
 {
-	uint32_t base;
 	struct fec_priv *fec = (struct fec_priv *)dev->priv;
 	uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop;
 	uint32_t rcntrl;
-	int i;
+	uint32_t size;
+	int i, ret;
 
 	/* Initialize MAC address */
 	fec_set_hwaddr(dev);
 
 	/*
-	 * reserve memory for both buffer descriptor chains at once
-	 * Datasheet forces the startaddress of each chain is 16 byte
-	 * aligned
+	 * Allocate transmit descriptors, there are two in total. This
+	 * allocation respects cache alignment.
 	 */
-	if (fec->base_ptr == NULL)
-		fec->base_ptr = malloc((2 + FEC_RBD_NUM) *
-				sizeof(struct fec_bd) + DB_ALIGNMENT);
-	base = (uint32_t)fec->base_ptr;
-	if (!base) {
-		puts("fec_mxc: not enough malloc memory\n");
-		return -ENOMEM;
+	if (!fec->tbd_base) {
+		size = roundup(2 * sizeof(struct fec_bd),
+				ARCH_DMA_MINALIGN);
+		fec->tbd_base = memalign(ARCH_DMA_MINALIGN, size);
+		if (!fec->tbd_base) {
+			ret = -ENOMEM;
+			goto err1;
+		}
+		memset(fec->tbd_base, 0, size);
+		fec_tbd_init(fec);
+		flush_dcache_range((unsigned)fec->tbd_base, size);
 	}
-	memset((void *)base, 0, (2 + FEC_RBD_NUM) *
-			sizeof(struct fec_bd) + DB_ALIGNMENT);
-	base += (DB_ALIGNMENT-1);
-	base &= ~(DB_ALIGNMENT-1);
-
-	fec->rbd_base = (struct fec_bd *)base;
 
-	base += FEC_RBD_NUM * sizeof(struct fec_bd);
-
-	fec->tbd_base = (struct fec_bd *)base;
+	/*
+	 * Allocate receive descriptors. This allocation respects cache
+	 * alignment.
+	 */
+	if (!fec->rbd_base) {
+		size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
+				ARCH_DMA_MINALIGN);
+		fec->rbd_base = memalign(ARCH_DMA_MINALIGN, size);
+		if (!fec->rbd_base) {
+			ret = -ENOMEM;
+			goto err2;
+		}
+		memset(fec->rbd_base, 0, size);
+		/*
+		 * Initialize RxBD ring
+		 */
+		if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) {
+			ret = -ENOMEM;
+			goto err3;
+		}
+		flush_dcache_range((unsigned)fec->rbd_base,
+				   (unsigned)fec->rbd_base + size);
+	}
 
 	/*
 	 * Set interrupt mask register
@@ -566,23 +615,19 @@  static int fec_init(struct eth_device *dev, bd_t* bd)
 	writel((uint32_t)fec->tbd_base, &fec->eth->etdsr);
 	writel((uint32_t)fec->rbd_base, &fec->eth->erdsr);
 
-	/*
-	 * Initialize RxBD/TxBD rings
-	 */
-	if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) {
-		free(fec->base_ptr);
-		fec->base_ptr = NULL;
-		return -ENOMEM;
-	}
-	fec_tbd_init(fec);
-
-
 #ifndef CONFIG_PHYLIB
 	if (fec->xcv_type != SEVENWIRE)
 		miiphy_restart_aneg(dev);
 #endif
 	fec_open(dev);
 	return 0;
+
+err3:
+	free(fec->rbd_base);
+err2:
+	free(fec->tbd_base);
+err1:
+	return ret;
 }
 
 /**
@@ -631,9 +676,11 @@  static void fec_halt(struct eth_device *dev)
  * @param[in] length Data count in bytes
  * @return 0 on success
  */
-static int fec_send(struct eth_device *dev, volatile void* packet, int length)
+static int fec_send(struct eth_device *dev, void *packet, int length)
 {
 	unsigned int status;
+	uint32_t size;
+	uint32_t addr;
 
 	/*
 	 * This routine transmits one frame.  This routine only accepts
@@ -650,15 +697,21 @@  static int fec_send(struct eth_device *dev, volatile void* packet, int length)
 	}
 
 	/*
-	 * Setup the transmit buffer
-	 * Note: We are always using the first buffer for transmission,
-	 * the second will be empty and only used to stop the DMA engine
+	 * Setup the transmit buffer. We are always using the first buffer for
+	 * transmission, the second will be empty and only used to stop the DMA
+	 * engine. We also flush the packet to RAM here to avoid cache trouble.
 	 */
-#ifdef	CONFIG_FEC_MXC_SWAP_PACKET
+#ifdef CONFIG_FEC_MXC_SWAP_PACKET
 	swap_packet((uint32_t *)packet, length);
 #endif
+
+	addr = (uint32_t)packet;
+	size = roundup(length, ARCH_DMA_MINALIGN);
+	flush_dcache_range(addr, addr + size);
+
 	writew(length, &fec->tbd_base[fec->tbd_index].data_length);
-	writel((uint32_t)packet, &fec->tbd_base[fec->tbd_index].data_pointer);
+	writel(addr, &fec->tbd_base[fec->tbd_index].data_pointer);
+
 	/*
 	 * update BD's status now
 	 * This block:
@@ -672,16 +725,30 @@  static int fec_send(struct eth_device *dev, volatile void* packet, int length)
 	writew(status, &fec->tbd_base[fec->tbd_index].status);
 
 	/*
+	 * Flush data cache. This code flushes both TX descriptors to RAM.
+	 * After this code, the descriptors will be safely in RAM and we
+	 * can start DMA.
+	 */
+	size = roundup(2 * sizeof(struct fec_bd), ARCH_DMA_MINALIGN);
+	addr = (uint32_t)fec->tbd_base;
+	flush_dcache_range(addr, addr + size);
+
+	/*
 	 * Enable SmartDMA transmit task
 	 */
 	fec_tx_task_enable(fec);
 
 	/*
-	 * wait until frame is sent .
+	 * Wait until frame is sent. On each turn of the wait cycle, we must
+	 * invalidate data cache to see what's really in RAM. Also, we need
+	 * barrier here.
 	 */
+	invalidate_dcache_range(addr, addr + size);
 	while (readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_READY) {
 		udelay(1);
+		invalidate_dcache_range(addr, addr + size);
 	}
+
 	debug("fec_send: status 0x%x index %d\n",
 			readw(&fec->tbd_base[fec->tbd_index].status),
 			fec->tbd_index);
@@ -707,6 +774,8 @@  static int fec_recv(struct eth_device *dev)
 	int frame_length, len = 0;
 	struct nbuf *frame;
 	uint16_t bd_status;
+	uint32_t addr, size;
+	int i;
 	uchar buff[FEC_MAX_PKT_SIZE];
 
 	/*
@@ -737,8 +806,23 @@  static int fec_recv(struct eth_device *dev)
 	}
 
 	/*
-	 * ensure reading the right buffer status
+	 * Read the buffer status. Before the status can be read, the data cache
+	 * must be invalidated, because the data in RAM might have been changed
+	 * by DMA. The descriptors are properly aligned to cachelines so there's
+	 * no need to worry they'd overlap.
+	 *
+	 * WARNING: By invalidating the descriptor here, we also invalidate
+	 * the descriptors surrounding this one. Therefore we can NOT change the
+	 * contents of this descriptor nor the surrounding ones. The problem is
+	 * that in order to mark the descriptor as processed, we need to change
+	 * the descriptor. The solution is to mark the whole cache line when all
+	 * descriptors in the cache line are processed.
 	 */
+	addr = (uint32_t)rbd;
+	addr &= ~(ARCH_DMA_MINALIGN - 1);
+	size = roundup(sizeof(struct fec_bd), ARCH_DMA_MINALIGN);
+	invalidate_dcache_range(addr, addr + size);
+
 	bd_status = readw(&rbd->status);
 	debug("fec_recv: status 0x%x\n", bd_status);
 
@@ -751,9 +835,16 @@  static int fec_recv(struct eth_device *dev)
 			frame = (struct nbuf *)readl(&rbd->data_pointer);
 			frame_length = readw(&rbd->data_length) - 4;
 			/*
+			 * Invalidate data cache over the buffer
+			 */
+			addr = (uint32_t)frame;
+			size = roundup(frame_length, ARCH_DMA_MINALIGN);
+			invalidate_dcache_range(addr, addr + size);
+
+			/*
 			 *  Fill the buffer and pass it to upper layers
 			 */
-#ifdef	CONFIG_FEC_MXC_SWAP_PACKET
+#ifdef CONFIG_FEC_MXC_SWAP_PACKET
 			swap_packet((uint32_t *)frame->data, frame_length);
 #endif
 			memcpy(buff, frame->data, frame_length);
@@ -765,11 +856,25 @@  static int fec_recv(struct eth_device *dev)
 						(ulong)rbd->data_pointer,
 						bd_status);
 		}
+
 		/*
-		 * free the current buffer, restart the engine
-		 * and move forward to the next buffer
+		 * Free the current buffer, restart the engine and move forward
+		 * to the next buffer. Here we check if the whole cacheline of
+		 * descriptors was already processed and if so, we mark it free
+		 * as whole.
 		 */
-		fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd);
+		size = RXDESC_PER_CACHELINE - 1;
+		if ((fec->rbd_index & size) == size) {
+			i = fec->rbd_index - size;
+			addr = (uint32_t)&fec->rbd_base[i];
+			for (; i <= fec->rbd_index ; i++) {
+				fec_rbd_clean(i == (FEC_RBD_NUM - 1),
+					      &fec->rbd_base[i]);
+			}
+			flush_dcache_range(addr,
+				addr + ARCH_DMA_MINALIGN);
+		}
+
 		fec_rx_task_enable(fec);
 		fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM;
 	}
@@ -866,7 +971,7 @@  static int fec_probe(bd_t *bd, int dev_id, int phy_id, uint32_t base_addr)
 	bus->read = fec_phy_read;
 	bus->write = fec_phy_write;
 	sprintf(bus->name, edev->name);
-#ifdef	CONFIG_MX28
+#ifdef CONFIG_MX28
 	/*
 	 * The i.MX28 has two ethernet interfaces, but they are not equal.
 	 * Only the first one can access the MDIO bus.
@@ -901,7 +1006,7 @@  err1:
 	return ret;
 }
 
-#ifndef	CONFIG_FEC_MXC_MULTI
+#ifndef CONFIG_FEC_MXC_MULTI
 int fecmxc_initialize(bd_t *bd)
 {
 	int lout = 1;
diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h
index 2eb7803..852b2e0 100644
--- a/drivers/net/fec_mxc.h
+++ b/drivers/net/fec_mxc.h
@@ -234,22 +234,6 @@  struct ethernet_regs {
 #endif
 
 /**
- * @brief Descriptor buffer alignment
- *
- * i.MX27 requires a 16 byte alignment (but for the first element only)
- */
-#define DB_ALIGNMENT		16
-
-/**
- * @brief Data buffer alignment
- *
- * i.MX27 requires a four byte alignment for transmit and 16 bits
- * alignment for receive so take 16
- * Note: Valid for member data_pointer in struct buffer_descriptor
- */
-#define DB_DATA_ALIGNMENT	16
-
-/**
  * @brief Receive & Transmit Buffer Descriptor definitions
  *
  * Note: The first BD must be aligned (see DB_ALIGNMENT)
@@ -282,8 +266,7 @@  struct fec_priv {
 	struct fec_bd *tbd_base;	/* TBD ring */
 	int tbd_index;			/* next transmit BD to write */
 	bd_t *bd;
-	void *rdb_ptr;
-	void *base_ptr;
+	uint8_t *tdb_ptr;
 	int dev_id;
 	int phy_id;
 	struct mii_dev *bus;