[U-Boot,3/3] sunxi: H6: use writel_relaxed for DRAM timing register accesses

Message ID 20190111003121.12360-4-andre.przywara@arm.com
State New
Delegated to: Jagannadha Sutradharudu Teki
Headers show
Series
  • arm: Introduce writel/readl_relaxed accessors
Related show

Commit Message

André Przywara Jan. 11, 2019, 12:31 a.m.
The timing registers in the DRAM controller can be programmed in any
order, as they will only take effect once the controller is eventually
"activated".

Switch the MMIO writes in mctl_set_timing_lpddr3() over to use
writel_relaxed(), since we don't need the stronger guarantee of the
normal writel(). We satisfy the overall ordering requirement by ending
the function with an explicit DMB barrier.

In this case we are not interested in the performance benefit this
usually gives, but in the saved instructions, which sum up for the many
writes we have in the timing setup.
Due to alignment effects this shrinks our chronically tight H6 SPL by a
whopping 2KB, which brings it in the same region as for the other
AArch64 Allwinner SPL builds.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arch/arm/mach-sunxi/dram_sun50i_h6.c | 79 +++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 37 deletions(-)

Patch

diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c
index 5da90a2835..84a33a63d6 100644
--- a/arch/arm/mach-sunxi/dram_sun50i_h6.c
+++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c
@@ -241,51 +241,55 @@  static void mctl_set_timing_lpddr3(struct dram_para *para)
 	memcpy(mctl_phy->mr, mr_lpddr3, sizeof(mr_lpddr3));
 
 	/* set DRAM timing */
-	writel((twtp << 24) | (tfaw << 16) | (trasmax << 8) | tras,
-	       &mctl_ctl->dramtmg[0]);
-	writel((txp << 16) | (trtp << 8) | trc, &mctl_ctl->dramtmg[1]);
-	writel((tcwl << 24) | (tcl << 16) | (trd2wr << 8) | twr2rd,
-	       &mctl_ctl->dramtmg[2]);
-	writel((tmrw << 20) | (tmrd << 12) | tmod, &mctl_ctl->dramtmg[3]);
-	writel((trcd << 24) | (tccd << 16) | (trrd << 8) | trp,
-	       &mctl_ctl->dramtmg[4]);
-	writel((tcksrx << 24) | (tcksre << 16) | (tckesr << 8) | tcke,
-	       &mctl_ctl->dramtmg[5]);
+	writel_relaxed((twtp << 24) | (tfaw << 16) | (trasmax << 8) | tras,
+		       &mctl_ctl->dramtmg[0]);
+	writel_relaxed((txp << 16) | (trtp << 8) | trc, &mctl_ctl->dramtmg[1]);
+	writel_relaxed((tcwl << 24) | (tcl << 16) | (trd2wr << 8) | twr2rd,
+		       &mctl_ctl->dramtmg[2]);
+	writel_relaxed((tmrw << 20) | (tmrd << 12) | tmod,
+		       &mctl_ctl->dramtmg[3]);
+	writel_relaxed((trcd << 24) | (tccd << 16) | (trrd << 8) | trp,
+		       &mctl_ctl->dramtmg[4]);
+	writel_relaxed((tcksrx << 24) | (tcksre << 16) | (tckesr << 8) | tcke,
+		       &mctl_ctl->dramtmg[5]);
 	/* Value suggested by ZynqMP manual and used by libdram */
-	writel((txp + 2) | 0x02020000, &mctl_ctl->dramtmg[6]);
-	writel((txsfast << 24) | (txsabort << 16) | (txsdll << 8) | txs,
-	       &mctl_ctl->dramtmg[8]);
-	writel(txsr, &mctl_ctl->dramtmg[14]);
+	writel_relaxed((txp + 2) | 0x02020000, &mctl_ctl->dramtmg[6]);
+	writel_relaxed((txsfast << 24) | (txsabort << 16) | (txsdll << 8) | txs,
+		       &mctl_ctl->dramtmg[8]);
+	writel_relaxed(txsr, &mctl_ctl->dramtmg[14]);
 
 	clrsetbits_le32(&mctl_ctl->init[0], (3 << 30), (1 << 30));
-	writel(0, &mctl_ctl->dfimisc);
+	writel_relaxed(0, &mctl_ctl->dfimisc);
 	clrsetbits_le32(&mctl_ctl->rankctl, 0xff0, 0x660);
 
 	/*
 	 * Set timing registers of the PHY.
 	 * Note: the PHY is clocked 2x from the DRAM frequency.
 	 */
-	writel((trrd << 25) | (tras << 17) | (trp << 9) | (trtp << 1),
+	writel_relaxed((trrd << 25) | (tras << 17) | (trp << 9) | (trtp << 1),
 	       &mctl_phy->dtpr[0]);
-	writel((tfaw << 17) | 0x28000400 | (tmrd << 1), &mctl_phy->dtpr[1]);
-	writel(((txs << 6) - 1) | (tcke << 17), &mctl_phy->dtpr[2]);
-	writel(((txsdll << 22) - (0x1 << 16)) | twtr_sa | (tcksrea << 8),
-	       &mctl_phy->dtpr[3]);
-	writel((txp << 1) | (trfc << 17) | 0x800, &mctl_phy->dtpr[4]);
-	writel((trc << 17) | (trcd << 9) | (twtr << 1), &mctl_phy->dtpr[5]);
-	writel(0x0505, &mctl_phy->dtpr[6]);
+	writel_relaxed((tfaw << 17) | 0x28000400 | (tmrd << 1),
+		       &mctl_phy->dtpr[1]);
+	writel_relaxed(((txs << 6) - 1) | (tcke << 17), &mctl_phy->dtpr[2]);
+	writel_relaxed(((txsdll << 22) - (0x1 << 16)) | twtr_sa |
+		       (tcksrea << 8), &mctl_phy->dtpr[3]);
+	writel_relaxed((txp << 1) | (trfc << 17) | 0x800, &mctl_phy->dtpr[4]);
+	writel_relaxed((trc << 17) | (trcd << 9) | (twtr << 1),
+		       &mctl_phy->dtpr[5]);
+	writel_relaxed(0x0505, &mctl_phy->dtpr[6]);
 
 	/* Configure DFI timing */
-	writel(tcl | 0x2000200 | (t_rdata_en << 16) | 0x808000,
-	       &mctl_ctl->dfitmg0);
-	writel(0x040201, &mctl_ctl->dfitmg1);
+	writel_relaxed(tcl | 0x2000200 | (t_rdata_en << 16) | 0x808000,
+		       &mctl_ctl->dfitmg0);
+	writel_relaxed(0x040201, &mctl_ctl->dfitmg1);
 
 	/* Configure PHY timing */
-	writel(tdinit0 | (tdinit1 << 20), &mctl_phy->ptr[3]);
-	writel(tdinit2 | (tdinit3 << 18), &mctl_phy->ptr[4]);
+	writel_relaxed(tdinit0 | (tdinit1 << 20), &mctl_phy->ptr[3]);
+	writel_relaxed(tdinit2 | (tdinit3 << 18), &mctl_phy->ptr[4]);
 
 	/* set refresh timing */
-	writel((trefi << 16) | trfc, &mctl_ctl->rfshtmg);
+	writel_relaxed((trefi << 16) | trfc, &mctl_ctl->rfshtmg);
+	DMB;
 }
 
 static void mctl_sys_init(struct dram_para *para)
@@ -476,17 +480,17 @@  static void mctl_bit_delay_set(struct dram_para *para)
 		val = readl(&mctl_phy->dx[i].bdlr0);
 		for (j = 0; j < 4; j++)
 			val += para->dx_write_delays[i][j] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr0);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr0);
 
 		val = readl(&mctl_phy->dx[i].bdlr1);
 		for (j = 0; j < 4; j++)
 			val += para->dx_write_delays[i][j + 4] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr1);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr1);
 
 		val = readl(&mctl_phy->dx[i].bdlr2);
 		for (j = 0; j < 4; j++)
 			val += para->dx_write_delays[i][j + 8] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr2);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr2);
 	}
 	clrbits_le32(&mctl_phy->pgcr[0], BIT(26));
 
@@ -494,22 +498,22 @@  static void mctl_bit_delay_set(struct dram_para *para)
 		val = readl(&mctl_phy->dx[i].bdlr3);
 		for (j = 0; j < 4; j++)
 			val += para->dx_read_delays[i][j] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr3);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr3);
 
 		val = readl(&mctl_phy->dx[i].bdlr4);
 		for (j = 0; j < 4; j++)
 			val += para->dx_read_delays[i][j + 4] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr4);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr4);
 
 		val = readl(&mctl_phy->dx[i].bdlr5);
 		for (j = 0; j < 4; j++)
 			val += para->dx_read_delays[i][j + 8] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr5);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr5);
 
 		val = readl(&mctl_phy->dx[i].bdlr6);
 		val += (para->dx_read_delays[i][12] << 8) |
 		       (para->dx_read_delays[i][13] << 16);
-		writel(val, &mctl_phy->dx[i].bdlr6);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr6);
 	}
 	setbits_le32(&mctl_phy->pgcr[0], BIT(26));
 	udelay(1);
@@ -517,8 +521,9 @@  static void mctl_bit_delay_set(struct dram_para *para)
 	for (i = 1; i < 14; i++) {
 		val = readl(&mctl_phy->acbdlr[i]);
 		val += 0x0a0a0a0a;
-		writel(val, &mctl_phy->acbdlr[i]);
+		writel_relaxed(val, &mctl_phy->acbdlr[i]);
 	}
+	DMB;
 }
 
 static void mctl_channel_init(struct dram_para *para)