Patchwork [U-Boot] i.MX5/6 U-Boot: Cache enabling

login
register
mail settings
Submitter Marek Vasut
Date Feb. 9, 2012, 7:06 a.m.
Message ID <201202090806.51520.marek.vasut@gmail.com>
Download mbox | patch
Permalink /patch/140320/
State Superseded
Headers show

Comments

Marek Vasut - Feb. 9, 2012, 7:06 a.m.
> On 04.02.2012 11:18, Marek Vasut wrote:
> >> Let's discuss how to enable the i.MX5/6 caches in U-Boot:
> >> 
> >> On 03.02.2012 12:00, Stefano Babic wrote:
> >>> On 03/02/2012 11:18, Dirk Behme wrote:
> >> ...
> >> 
> >>>>> As your concerns are surely related to speed up the boot process,
> >>>>> IMHO we can focus efforts to add cache support for MX5 / MX6.
> >>>> 
> >>>> Ok, sounds good. Any idea what has to be done for this? Or what would
> >>>> be the steps for this?
> >>> 
> >>> As armv7 architecture, the MX can profit of the work already done for
> >>> other SOCs. Functions for enabling / disabling / invalidate caches are
> >>> already provided, in arch/arm/lib and arch/arm/cpu/armv7/cache_v7.c. So
> >>> at least for MX5/MX6.
> >>> 
> >>> But we should change MXC drivers to be cache-aware. At least the FEC
> >>> driver and MMC driver are known to not work when dcache is on.
> >> 
> >> Marek, Troy, Fabio: What do you think is needed to make the i.MX5/6
> >> FEC driver cache-aware?
> > 
> > I already have a partly finished implementation of FEC ethernet with
> > cache support somewhere on my drive.
> 
> Do you like to share this?
> 
> Many thanks and best regards

Try the attached stuff, it's likely crap and needs rebasing. It might give you 
some pointers as of how to handle this.

> 
> Dirk

Patch

From b77e8f6347b456883f27dc2323fbac0453e16110 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Fri, 19 Aug 2011 23:16:03 +0200
Subject: [PATCH 45/52] FEC: Fix to work with data cache enabled

This patch fixes FEC ethernet driver to work with data caches.

Changes done:
- Buffers are now allocated aligned
- Descriptors are marked as "free" only when whole cacheline containing them is
  "free" in fec_recv().
- Introduction of CONFIG_FEC_DESC_ALIGNMENT, which specifies on what boundary
  the descriptors shall be aligned. This allows aligning them on cacheline
  boundary on architectures where the cacheline isn't 16 bytes wide (ARMv5 has
  32 bytes long cacheline).
- Introduction of CONFIG_FEC_DATA_ALIGNMENT, DTTO see above, for data buffers.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/net/fec_mxc.c |  255 +++++++++++++++++++++++++++++++++++--------------
 drivers/net/fec_mxc.h |   19 +----
 2 files changed, 185 insertions(+), 89 deletions(-)

diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c
index e67b01f..aec6e89 100644
--- a/drivers/net/fec_mxc.c
+++ b/drivers/net/fec_mxc.c
@@ -50,6 +50,33 @@  DECLARE_GLOBAL_DATA_PTR;
 #define	CONFIG_FEC_MXC_SWAP_PACKET
 #endif
 
+#ifndef	CONFIG_FEC_DESC_ALIGNMENT
+#define	CONFIG_FEC_DESC_ALIGNMENT	16
+#endif
+
+#ifndef	CONFIG_FEC_DATA_ALIGNMENT
+#define	CONFIG_FEC_DATA_ALIGNMENT	16
+#endif
+
+/* Check various alignment issues at compile time */
+#if ((CONFIG_FEC_DESC_ALIGNMENT < 16) || (CONFIG_FEC_DESC_ALIGNMENT % 16 != 0))
+#error	"CONFIG_FEC_DESC_ALIGNMENT must be multiple of 16!"
+#endif
+
+#if ((CONFIG_FEC_DATA_ALIGNMENT < 16) || (CONFIG_FEC_DATA_ALIGNMENT % 16 != 0))
+#error	"CONFIG_FEC_DATA_ALIGNMENT must be multiple of 16!"
+#endif
+
+#if ((PKTALIGN < CONFIG_FEC_DATA_ALIGNMENT) || \
+	(PKTALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0))
+#error	"PKTALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!"
+#endif
+
+#if ((PKTSIZE_ALIGN < CONFIG_FEC_DATA_ALIGNMENT) || \
+	(PKTSIZE_ALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0))
+#error	"PKTSIZE_ALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!"
+#endif
+
 #undef DEBUG
 
 struct nbuf {
@@ -267,43 +294,45 @@  static int fec_tx_task_disable(struct fec_priv *fec)
  * Initialize receive task's buffer descriptors
  * @param[in] fec all we know about the device yet
  * @param[in] count receive buffer count to be allocated
- * @param[in] size size of each receive buffer
+ * @param[in] dsize desired size of each receive buffer
  * @return 0 on success
  *
  * For this task we need additional memory for the data buffers. And each
  * data buffer requires some alignment. Thy must be aligned to a specific
- * boundary each (DB_DATA_ALIGNMENT).
+ * boundary each.
  */
-static int fec_rbd_init(struct fec_priv *fec, int count, int size)
+static int fec_rbd_init(struct fec_priv *fec, int count, int dsize)
 {
-	int ix;
-	uint32_t p = 0;
-
-	/* reserve data memory and consider alignment */
-	if (fec->rdb_ptr == NULL)
-		fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT);
-	p = (uint32_t)fec->rdb_ptr;
-	if (!p) {
-		puts("fec_mxc: not enough malloc memory\n");
-		return -ENOMEM;
-	}
-	memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT);
-	p += DB_DATA_ALIGNMENT-1;
-	p &= ~(DB_DATA_ALIGNMENT-1);
-
-	for (ix = 0; ix < count; ix++) {
-		writel(p, &fec->rbd_base[ix].data_pointer);
-		p += size;
-		writew(FEC_RBD_EMPTY, &fec->rbd_base[ix].status);
-		writew(0, &fec->rbd_base[ix].data_length);
-	}
+	uint32_t size;
+	int i;
+	uint8_t *data;
+
 	/*
-	 * mark the last RBD to close the ring
+	 * Allocate memory for the buffers. This allocation respects the
+	 * alignment
 	 */
-	writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[ix - 1].status);
+	size = roundup(dsize, CONFIG_FEC_DATA_ALIGNMENT);
+	for (i = 0; i < count; i++) {
+		data = memalign(CONFIG_FEC_DATA_ALIGNMENT, size);
+		if (!data)
+			goto err;
+
+		fec->rbd_base[i].data_pointer = (uint32_t)data;
+		fec->rbd_base[i].status = FEC_RBD_EMPTY;
+		fec->rbd_base[i].data_length = 0;
+	}
+
+	/* Mark the last RBD to close the ring. */
+	fec->rbd_base[i - 1].status = FEC_RBD_EMPTY | FEC_RBD_WRAP;
 	fec->rbd_index = 0;
 
 	return 0;
+
+err:
+	for (--i; i >= 0; i--)
+		free((uint8_t *)fec->rbd_base[i].data_pointer);
+
+	return -ENOMEM;
 }
 
 /**
@@ -320,8 +349,8 @@  static int fec_rbd_init(struct fec_priv *fec, int count, int size)
  */
 static void fec_tbd_init(struct fec_priv *fec)
 {
-	writew(0x0000, &fec->tbd_base[0].status);
-	writew(FEC_TBD_WRAP, &fec->tbd_base[1].status);
+	fec->tbd_base[0].status = 0;
+	fec->tbd_base[1].status = FEC_TBD_WRAP;
 	fec->tbd_index = 0;
 }
 
@@ -378,12 +407,25 @@  static int fec_set_hwaddr(struct eth_device *dev)
 static int fec_open(struct eth_device *edev)
 {
 	struct fec_priv *fec = (struct fec_priv *)edev->priv;
+	uint32_t addr, size;
+	int i;
 
 	debug("fec_open: fec_open(dev)\n");
 	/* full-duplex, heartbeat disabled */
 	writel(1 << 2, &fec->eth->x_cntrl);
 	fec->rbd_index = 0;
 
+	/* Invalidate all descriptors */
+	for (i = 0; i < FEC_RBD_NUM - 1; i++)
+		fec_rbd_clean(0, &fec->rbd_base[i]);
+	fec_rbd_clean(1, &fec->rbd_base[i]);
+
+	/* Flush the descriptors into RAM */
+	size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
+			CONFIG_FEC_DATA_ALIGNMENT);
+	addr = (uint32_t)&fec->rbd_base[0];
+	flush_dcache_range(addr, addr + size);
+
 	/*
 	 * Enable FEC-Lite controller
 	 */
@@ -433,38 +475,44 @@  static int fec_open(struct eth_device *edev)
 
 static int fec_init(struct eth_device *dev, bd_t* bd)
 {
-	uint32_t base;
 	struct fec_priv *fec = (struct fec_priv *)dev->priv;
 	uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop;
 	uint32_t rcntrl;
-	int i;
+	uint32_t size;
+	int i, ret;
 
 	/* Initialize MAC address */
 	fec_set_hwaddr(dev);
 
 	/*
-	 * reserve memory for both buffer descriptor chains at once
-	 * Datasheet forces the startaddress of each chain is 16 byte
-	 * aligned
+	 * Allocate transmit descriptors, there are two in total. This
+	 * allocation respects cache alignment.
 	 */
-	if (fec->base_ptr == NULL)
-		fec->base_ptr = malloc((2 + FEC_RBD_NUM) *
-				sizeof(struct fec_bd) + DB_ALIGNMENT);
-	base = (uint32_t)fec->base_ptr;
-	if (!base) {
-		puts("fec_mxc: not enough malloc memory\n");
-		return -ENOMEM;
+	if (!fec->tbd_base) {
+		size = roundup(2 * sizeof(struct fec_bd),
+				CONFIG_FEC_DATA_ALIGNMENT);
+		fec->tbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size);
+		if (!fec->tbd_base) {
+			ret = -ENOMEM;
+			goto err1;
+		}
+		memset(fec->tbd_base, 0, size);
 	}
-	memset((void *)base, 0, (2 + FEC_RBD_NUM) *
-			sizeof(struct fec_bd) + DB_ALIGNMENT);
-	base += (DB_ALIGNMENT-1);
-	base &= ~(DB_ALIGNMENT-1);
 
-	fec->rbd_base = (struct fec_bd *)base;
-
-	base += FEC_RBD_NUM * sizeof(struct fec_bd);
-
-	fec->tbd_base = (struct fec_bd *)base;
+	/*
+	 * Allocate receive descriptors. This allocation respects cache
+	 * alignment.
+	 */
+	if (!fec->rbd_base) {
+		size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
+				CONFIG_FEC_DATA_ALIGNMENT);
+		fec->rbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size);
+		if (!fec->rbd_base) {
+			ret = -ENOMEM;
+			goto err2;
+		}
+		memset(fec->rbd_base, 0, size);
+	}
 
 	/*
 	 * Set interrupt mask register
@@ -519,22 +567,25 @@  static int fec_init(struct eth_device *dev, bd_t* bd)
 	writel((uint32_t)fec->tbd_base, &fec->eth->etdsr);
 	writel((uint32_t)fec->rbd_base, &fec->eth->erdsr);
 
-	/*
-	 * Initialize RxBD/TxBD rings
-	 */
-	if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) {
-		free(fec->base_ptr);
-		fec->base_ptr = NULL;
-		return -ENOMEM;
-	}
-	fec_tbd_init(fec);
+	/* Initialize RxBD ring buffer */
+	if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0)
+		goto err3;
 
+	/* Initialize TxBD ring buffer */
+	fec_tbd_init(fec);
 
 	if (fec->xcv_type != SEVENWIRE)
 		miiphy_restart_aneg(dev);
 
 	fec_open(dev);
 	return 0;
+
+err3:
+	free(fec->rbd_base);
+err2:
+	free(fec->tbd_base);
+err1:
+	return ret;
 }
 
 /**
@@ -583,9 +634,11 @@  static void fec_halt(struct eth_device *dev)
  * @param[in] length Data count in bytes
  * @return 0 on success
  */
-static int fec_send(struct eth_device *dev, volatile void* packet, int length)
+static int fec_send(struct eth_device *dev, volatile void *packet, int length)
 {
 	unsigned int status;
+	uint32_t size;
+	uint32_t addr;
 
 	/*
 	 * This routine transmits one frame.  This routine only accepts
@@ -602,15 +655,21 @@  static int fec_send(struct eth_device *dev, volatile void* packet, int length)
 	}
 
 	/*
-	 * Setup the transmit buffer
-	 * Note: We are always using the first buffer for transmission,
-	 * the second will be empty and only used to stop the DMA engine
+	 * Setup the transmit buffer. We are always using the first buffer for
+	 * transmission, the second will be empty and only used to stop the DMA
+	 * engine. We also flush the packet to RAM here to avoid cache trouble.
 	 */
 #ifdef	CONFIG_FEC_MXC_SWAP_PACKET
 	swap_packet((uint32_t *)packet, length);
 #endif
-	writew(length, &fec->tbd_base[fec->tbd_index].data_length);
-	writel((uint32_t)packet, &fec->tbd_base[fec->tbd_index].data_pointer);
+
+	addr = (uint32_t)packet;
+	size = roundup(length, CONFIG_FEC_DATA_ALIGNMENT);
+	flush_dcache_range(addr, addr + size);
+
+	fec->tbd_base[fec->tbd_index].data_length = length;
+	fec->tbd_base[fec->tbd_index].data_pointer = addr;
+
 	/*
 	 * update BD's status now
 	 * This block:
@@ -619,9 +678,18 @@  static int fec_send(struct eth_device *dev, volatile void* packet, int length)
 	 * - might be the last BD in the list, so the address counter should
 	 *   wrap (-> keep the WRAP flag)
 	 */
-	status = readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_WRAP;
+	status = fec->tbd_base[fec->tbd_index].status & FEC_TBD_WRAP;
 	status |= FEC_TBD_LAST | FEC_TBD_TC | FEC_TBD_READY;
-	writew(status, &fec->tbd_base[fec->tbd_index].status);
+	fec->tbd_base[fec->tbd_index].status = status;
+
+	/*
+	 * Flush data cache. This code flushes both TX descriptors to RAM.
+	 * After this code this code, the descritors will be safely in RAM
+	 * and we can start DMA.
+	 */
+	size = roundup(2 * sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT);
+	addr = (uint32_t)fec->tbd_base;
+	flush_dcache_range(addr, addr + size);
 
 	/*
 	 * Enable SmartDMA transmit task
@@ -629,11 +697,16 @@  static int fec_send(struct eth_device *dev, volatile void* packet, int length)
 	fec_tx_task_enable(fec);
 
 	/*
-	 * wait until frame is sent .
+	 * Wait until frame is sent. On each turn of the wait cycle, we must
+	 * invalidate data cache to see what's really in RAM. Also, we need
+	 * barrier here.
 	 */
+	invalidate_dcache_range(addr, addr + size);
 	while (readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_READY) {
 		udelay(1);
+		invalidate_dcache_range(addr, addr + size);
 	}
+
 	debug("fec_send: status 0x%x index %d\n",
 			readw(&fec->tbd_base[fec->tbd_index].status),
 			fec->tbd_index);
@@ -659,6 +732,8 @@  static int fec_recv(struct eth_device *dev)
 	int frame_length, len = 0;
 	struct nbuf *frame;
 	uint16_t bd_status;
+	uint32_t addr, size;
+	int i;
 	uchar buff[FEC_MAX_PKT_SIZE];
 
 	/*
@@ -689,8 +764,23 @@  static int fec_recv(struct eth_device *dev)
 	}
 
 	/*
-	 * ensure reading the right buffer status
+	 * Read the buffer status. Before the status can be read, the data cache
+	 * must be invalidated, because the data in RAM might have been changed
+	 * by DMA. The descriptors are properly aligned to cachelines so there's
+	 * no need to worry they'd overlap.
+	 *
+	 * WARNING: By invalidating the descriptor here, we also invalidate
+	 * the descriptors surrounding this one. Therefore we can NOT change the
+	 * contents of this descriptor nor the surrounding ones. The problem is
+	 * that in order to mark the descriptor as processed, we need to change
+	 * the descriptor. The solution is to mark the whole cache line when all
+	 * descriptors in the cache line are processed.
 	 */
+	addr = (uint32_t)rbd;
+	addr &= ~(CONFIG_FEC_DATA_ALIGNMENT - 1);
+	size = roundup(sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT);
+	invalidate_dcache_range(addr, addr + size);
+
 	bd_status = readw(&rbd->status);
 	debug("fec_recv: status 0x%x\n", bd_status);
 
@@ -703,6 +793,13 @@  static int fec_recv(struct eth_device *dev)
 			frame = (struct nbuf *)readl(&rbd->data_pointer);
 			frame_length = readw(&rbd->data_length) - 4;
 			/*
+			 * Invalidate data cache over the buffer
+			 */
+			addr = (uint32_t)frame;
+			size = roundup(frame_length, CONFIG_FEC_DATA_ALIGNMENT);
+			invalidate_dcache_range(addr, addr + size);
+
+			/*
 			 *  Fill the buffer and pass it to upper layers
 			 */
 #ifdef	CONFIG_FEC_MXC_SWAP_PACKET
@@ -717,11 +814,27 @@  static int fec_recv(struct eth_device *dev)
 						(ulong)rbd->data_pointer,
 						bd_status);
 		}
+
 		/*
-		 * free the current buffer, restart the engine
-		 * and move forward to the next buffer
+		 * Free the current buffer, restart the engine and move forward
+		 * to the next buffer. Here we check if the whole cacheline of
+		 * descriptors was already processed and if so, we mark it free
+		 * as whole.
 		 */
-		fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd);
+		size = (CONFIG_FEC_DATA_ALIGNMENT / sizeof(struct fec_bd)) - 1;
+		if ((fec->rbd_index & size) == size) {
+			i = fec->rbd_index - size;
+			addr = (uint32_t)&fec->rbd_base[i];
+			for (; i <= fec->rbd_index + size; i++) {
+				if (i == (FEC_RBD_NUM - 1))
+					fec_rbd_clean(1, &fec->rbd_base[i]);
+				else
+					fec_rbd_clean(0, &fec->rbd_base[i]);
+			}
+			flush_dcache_range(addr,
+				addr + CONFIG_FEC_DATA_ALIGNMENT);
+		}
+
 		fec_rx_task_enable(fec);
 		fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM;
 	}
diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h
index 1a20388..76dabaf 100644
--- a/drivers/net/fec_mxc.h
+++ b/drivers/net/fec_mxc.h
@@ -226,22 +226,6 @@  struct ethernet_regs {
 #endif
 
 /**
- * @brief Descriptor buffer alignment
- *
- * i.MX27 requires a 16 byte alignment (but for the first element only)
- */
-#define DB_ALIGNMENT		16
-
-/**
- * @brief Data buffer alignment
- *
- * i.MX27 requires a four byte alignment for transmit and 16 bits
- * alignment for receive so take 16
- * Note: Valid for member data_pointer in struct buffer_descriptor
- */
-#define DB_DATA_ALIGNMENT	16
-
-/**
  * @brief Receive & Transmit Buffer Descriptor definitions
  *
  * Note: The first BD must be aligned (see DB_ALIGNMENT)
@@ -273,8 +257,7 @@  struct fec_priv {
 	struct fec_bd *tbd_base;	/* TBD ring */
 	int tbd_index;			/* next transmit BD to write */
 	bd_t *bd;
-	void *rdb_ptr;
-	void *base_ptr;
+	uint8_t *tdb_ptr;
 	int dev_id;
 	int phy_id;
 	int (*mii_postcall)(int);
-- 
1.7.5.4