diff mbox

[U-Boot,4/9] tegra: Replace home grown mmu code with generic table approach

Message ID 1456106232-233210-5-git-send-email-agraf@suse.de
State Superseded
Headers show

Commit Message

Alexander Graf Feb. 22, 2016, 1:57 a.m. UTC
Now that we have nice table driven page table creating code that gives
us everything we need, move to that.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/arm/mach-tegra/Makefile      |   1 -
 arch/arm/mach-tegra/arm64-mmu.c   | 131 --------------------------------------
 include/configs/tegra210-common.h |  16 +++++
 3 files changed, 16 insertions(+), 132 deletions(-)
 delete mode 100644 arch/arm/mach-tegra/arm64-mmu.c

Comments

Stephen Warren Feb. 22, 2016, 6:28 p.m. UTC | #1
On 02/21/2016 06:57 PM, Alexander Graf wrote:
> Now that we have nice table driven page table creating code that gives
> us everything we need, move to that.

> diff --git a/include/configs/tegra210-common.h b/include/configs/tegra210-common.h

> +#define CONFIG_SYS_FULL_VA
> +#define CONFIG_SYS_MEM_MAP {						\
> +	{								\
> +		.base = 0x0UL,						\
> +		.size = 0x80000000UL,					\
> +		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |		\
> +			 PTE_BLOCK_NON_SHARE |				\
> +			 PTE_BLOCK_PXN | PTE_BLOCK_UXN			\
> +	}, {								\
> +		.base = 0x80000000UL,					\
> +		.size = 0xff80000000UL,					\
> +		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |			\
> +			 PTE_BLOCK_INNER_SHARE				\
> +	},								\
> +	}

I'd prefer a layout that didn't align the closing } for different 
nesting levels in the same column. To avoid indenting everything a lot, 
it seems simplest to pull the final } back into the first column.

I believe the .size field of the second entry in the array only needs to 
be 0x80000000. Testing with a PCIe Ethernet card on p2371-2180 (the 
driver for which sets up noncached entries in the page tables, hence 
should exercise all this code) confirms that.

While recent Tegra systems do support more than 2GB of RAM, U-Boot will 
itself only use the first 2GB, so that PAs over 4GB are not used. See 
board_get_usable_ram_top() in arch/arm/mach-tegra/board2.c. That's 
because some peripherals can only access 32-bit PAs, and the simplest 
way to accommodate that is to ignore any RAM above the 32-bit limit.
Michal Simek Feb. 23, 2016, 10:37 a.m. UTC | #2
On 22.2.2016 19:28, Stephen Warren wrote:
> On 02/21/2016 06:57 PM, Alexander Graf wrote:
>> Now that we have nice table driven page table creating code that gives
>> us everything we need, move to that.
> 
>> diff --git a/include/configs/tegra210-common.h
>> b/include/configs/tegra210-common.h
> 
>> +#define CONFIG_SYS_FULL_VA
>> +#define CONFIG_SYS_MEM_MAP {                        \
>> +    {                                \
>> +        .base = 0x0UL,                        \
>> +        .size = 0x80000000UL,                    \
>> +        .attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |        \
>> +             PTE_BLOCK_NON_SHARE |                \
>> +             PTE_BLOCK_PXN | PTE_BLOCK_UXN            \
>> +    }, {                                \
>> +        .base = 0x80000000UL,                    \
>> +        .size = 0xff80000000UL,                    \
>> +        .attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |            \
>> +             PTE_BLOCK_INNER_SHARE                \
>> +    },                                \
>> +    }
> 
> I'd prefer a layout that didn't align the closing } for different
> nesting levels in the same column. To avoid indenting everything a lot,
> it seems simplest to pull the final } back into the first column.
> 
> I believe the .size field of the second entry in the array only needs to
> be 0x80000000. Testing with a PCIe Ethernet card on p2371-2180 (the
> driver for which sets up noncached entries in the page tables, hence
> should exercise all this code) confirms that.
> 
> While recent Tegra systems do support more than 2GB of RAM, U-Boot will
> itself only use the first 2GB, so that PAs over 4GB are not used. See
> board_get_usable_ram_top() in arch/arm/mach-tegra/board2.c. That's
> because some peripherals can only access 32-bit PAs, and the simplest
> way to accommodate that is to ignore any RAM above the 32-bit limit.

Didn't you use mtest to test memory above of 2GB?

Thanks,
Michal
Stephen Warren Feb. 23, 2016, 5:29 p.m. UTC | #3
On 02/23/2016 03:37 AM, Michal Simek wrote:
> On 22.2.2016 19:28, Stephen Warren wrote:
>> On 02/21/2016 06:57 PM, Alexander Graf wrote:
>>> Now that we have nice table driven page table creating code that gives
>>> us everything we need, move to that.
>>
>>> diff --git a/include/configs/tegra210-common.h
>>> b/include/configs/tegra210-common.h
>>
>>> +#define CONFIG_SYS_FULL_VA
>>> +#define CONFIG_SYS_MEM_MAP {                        \
>>> +    {                                \
>>> +        .base = 0x0UL,                        \
>>> +        .size = 0x80000000UL,                    \
>>> +        .attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |        \
>>> +             PTE_BLOCK_NON_SHARE |                \
>>> +             PTE_BLOCK_PXN | PTE_BLOCK_UXN            \
>>> +    }, {                                \
>>> +        .base = 0x80000000UL,                    \
>>> +        .size = 0xff80000000UL,                    \
>>> +        .attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |            \
>>> +             PTE_BLOCK_INNER_SHARE                \
>>> +    },                                \
>>> +    }
>>
>> I'd prefer a layout that didn't align the closing } for different
>> nesting levels in the same column. To avoid indenting everything a lot,
>> it seems simplest to pull the final } back into the first column.
>>
>> I believe the .size field of the second entry in the array only needs to
>> be 0x80000000. Testing with a PCIe Ethernet card on p2371-2180 (the
>> driver for which sets up noncached entries in the page tables, hence
>> should exercise all this code) confirms that.
>>
>> While recent Tegra systems do support more than 2GB of RAM, U-Boot will
>> itself only use the first 2GB, so that PAs over 4GB are not used. See
>> board_get_usable_ram_top() in arch/arm/mach-tegra/board2.c. That's
>> because some peripherals can only access 32-bit PAs, and the simplest
>> way to accommodate that is to ignore any RAM above the 32-bit limit.
>
> Didn't you use mtest to test memory above of 2GB?

It looks like we don't have mtest enabled.

However, I was able to use itest to confirm that RAM > 4GB PA does work 
with this patch. I suppose we may as well leave it enabled then.
Alexander Graf Feb. 24, 2016, 10:28 a.m. UTC | #4
On 22.02.16 19:28, Stephen Warren wrote:
> On 02/21/2016 06:57 PM, Alexander Graf wrote:
>> Now that we have nice table driven page table creating code that gives
>> us everything we need, move to that.
> 
>> diff --git a/include/configs/tegra210-common.h
>> b/include/configs/tegra210-common.h
> 
>> +#define CONFIG_SYS_FULL_VA
>> +#define CONFIG_SYS_MEM_MAP {                        \
>> +    {                                \
>> +        .base = 0x0UL,                        \
>> +        .size = 0x80000000UL,                    \
>> +        .attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |        \
>> +             PTE_BLOCK_NON_SHARE |                \
>> +             PTE_BLOCK_PXN | PTE_BLOCK_UXN            \
>> +    }, {                                \
>> +        .base = 0x80000000UL,                    \
>> +        .size = 0xff80000000UL,                    \
>> +        .attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |            \
>> +             PTE_BLOCK_INNER_SHARE                \
>> +    },                                \
>> +    }
> 
> I'd prefer a layout that didn't align the closing } for different
> nesting levels in the same column. To avoid indenting everything a lot,
> it seems simplest to pull the final } back into the first column.

Seems like people want this in as structs in board files rather than a
#define in a header anyway to enable more flexible code and ensure that
you can build the table based on dt. So there we have natural }
alignment again :).

> 
> I believe the .size field of the second entry in the array only needs to
> be 0x80000000. Testing with a PCIe Ethernet card on p2371-2180 (the
> driver for which sets up noncached entries in the page tables, hence
> should exercise all this code) confirms that.

I was surprised to see the full map in your code too, but I wanted to
make this patch with as little behavioral change as possible (for bisect
reasons). So the page table that gets constructed before and after
should be almost identical.

If we want to change behavior later on, I'd much rather like to see that
in a follow-up patch independent of this set.


Alex

> While recent Tegra systems do support more than 2GB of RAM, U-Boot will
> itself only use the first 2GB, so that PAs over 4GB are not used. See
> board_get_usable_ram_top() in arch/arm/mach-tegra/board2.c. That's
> because some peripherals can only access 32-bit PAs, and the simplest
> way to accommodate that is to ignore any RAM above the 32-bit limit.
diff mbox

Patch

diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile
index b2dbc69..31dd526 100644
--- a/arch/arm/mach-tegra/Makefile
+++ b/arch/arm/mach-tegra/Makefile
@@ -14,7 +14,6 @@  else
 obj-$(CONFIG_CMD_ENTERRCM) += cmd_enterrcm.o
 endif
 
-obj-$(CONFIG_ARM64) += arm64-mmu.o
 obj-y += ap.o
 obj-y += board.o board2.o
 obj-y += cache.o
diff --git a/arch/arm/mach-tegra/arm64-mmu.c b/arch/arm/mach-tegra/arm64-mmu.c
deleted file mode 100644
index c227652..0000000
--- a/arch/arm/mach-tegra/arm64-mmu.c
+++ /dev/null
@@ -1,131 +0,0 @@ 
-/*
- * (C) Copyright 2014 - 2015 Xilinx, Inc.
- * Michal Simek <michal.simek@xilinx.com>
- * (This file derived from arch/arm/cpu/armv8/zynqmp/cpu.c)
- *
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
- *
- * SPDX-License-Identifier:	GPL-2.0+
- */
-
-#include <common.h>
-#include <asm/system.h>
-#include <asm/armv8/mmu.h>
-
-DECLARE_GLOBAL_DATA_PTR;
-
-#define SECTION_SHIFT_L1	30UL
-#define SECTION_SHIFT_L2	21UL
-#define BLOCK_SIZE_L0		0x8000000000UL
-#define BLOCK_SIZE_L1		(1 << SECTION_SHIFT_L1)
-#define BLOCK_SIZE_L2		(1 << SECTION_SHIFT_L2)
-
-#define TCR_TG1_4K		(1 << 31)
-#define TCR_EPD1_DISABLE	(1 << 23)
-#define TEGRA_VA_BITS		40
-#define TEGRA_TCR		TCR_TG1_4K | \
-				TCR_EPD1_DISABLE | \
-				TCR_SHARED_OUTER | \
-				TCR_SHARED_INNER | \
-				TCR_IRGN_WBWA | \
-				TCR_ORGN_WBWA | \
-				TCR_T0SZ(TEGRA_VA_BITS)
-
-#define MEMORY_ATTR	PMD_SECT_AF | PMD_SECT_INNER_SHARE |	\
-			PMD_ATTRINDX(MT_NORMAL) |	\
-			PMD_TYPE_SECT
-#define DEVICE_ATTR	PMD_SECT_AF | PMD_SECT_PXN |	\
-			PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_NGNRNE) |	\
-			PMD_TYPE_SECT
-
-/* 4K size is required to place 512 entries in each level */
-#define TLB_TABLE_SIZE	0x1000
-
-/*
- * This mmu table looks as below
- * Level 0 table contains two entries to 512GB sizes. One is Level1 Table 0
- * and other Level1 Table1.
- * Level1 Table0 contains entries for each 1GB from 0 to 511GB.
- * Level1 Table1 contains entries for each 1GB from 512GB to 1TB.
- * Level2 Table0, Level2 Table1, Level2 Table2 and Level2 Table3 contains
- * entries for each 2MB starting from 0GB, 1GB, 2GB and 3GB respectively.
- */
-void mmu_setup(void)
-{
-	int el;
-	u64 i, section_l1t0, section_l1t1;
-	u64 section_l2t0, section_l2t1, section_l2t2, section_l2t3;
-	u64 *level0_table = (u64 *)gd->arch.tlb_addr;
-	u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + TLB_TABLE_SIZE);
-	u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + (2 * TLB_TABLE_SIZE));
-	u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + (3 * TLB_TABLE_SIZE));
-	u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + (4 * TLB_TABLE_SIZE));
-	u64 *level2_table_2 = (u64 *)(gd->arch.tlb_addr + (5 * TLB_TABLE_SIZE));
-	u64 *level2_table_3 = (u64 *)(gd->arch.tlb_addr + (6 * TLB_TABLE_SIZE));
-
-	/* Invalidate all table entries */
-	memset(level0_table, 0, PGTABLE_SIZE);
-
-	level0_table[0] =
-		(u64)level1_table_0 | PMD_TYPE_TABLE;
-	level0_table[1] =
-		(u64)level1_table_1 | PMD_TYPE_TABLE;
-
-	/*
-	 * set level 1 table 0, covering 0 to 512GB
-	 * set level 1 table 1, covering 512GB to 1TB
-	 */
-	section_l1t0 = 0;
-	section_l1t1 = BLOCK_SIZE_L0;
-
-	for (i = 0; i < 512; i++) {
-		level1_table_0[i] = section_l1t0;
-		if (i >= 4)
-			level1_table_0[i] |= MEMORY_ATTR;
-		level1_table_1[i] = section_l1t1;
-		level1_table_1[i] |= MEMORY_ATTR;
-		section_l1t0 += BLOCK_SIZE_L1;
-		section_l1t1 += BLOCK_SIZE_L1;
-	}
-
-	level1_table_0[0] =
-		(u64)level2_table_0 | PMD_TYPE_TABLE;
-	level1_table_0[1] =
-		(u64)level2_table_1 | PMD_TYPE_TABLE;
-	level1_table_0[2] =
-		(u64)level2_table_2 | PMD_TYPE_TABLE;
-	level1_table_0[3] =
-		(u64)level2_table_3 | PMD_TYPE_TABLE;
-
-	section_l2t0 = 0;
-	section_l2t1 = section_l2t0 + BLOCK_SIZE_L1; /* 1GB */
-	section_l2t2 = section_l2t1 + BLOCK_SIZE_L1; /* 2GB */
-	section_l2t3 = section_l2t2 + BLOCK_SIZE_L1; /* 3GB */
-
-	for (i = 0; i < 512; i++) {
-		level2_table_0[i] = section_l2t0 | DEVICE_ATTR;
-		level2_table_1[i] = section_l2t1 | DEVICE_ATTR;
-		level2_table_2[i] = section_l2t2 | MEMORY_ATTR;
-		level2_table_3[i] = section_l2t3 | MEMORY_ATTR;
-		section_l2t0 += BLOCK_SIZE_L2;
-		section_l2t1 += BLOCK_SIZE_L2;
-		section_l2t2 += BLOCK_SIZE_L2;
-		section_l2t3 += BLOCK_SIZE_L2;
-	}
-
-	/* flush new MMU table */
-	flush_dcache_range(gd->arch.tlb_addr,
-			   gd->arch.tlb_addr + gd->arch.tlb_size);
-
-	/* point TTBR to the new table */
-	el = current_el();
-	set_ttbr_tcr_mair(el, gd->arch.tlb_addr,
-			  TEGRA_TCR, MEMORY_ATTRIBUTES);
-
-	set_sctlr(get_sctlr() | CR_M);
-}
-
-u64 *arch_get_page_table(void)
-{
-	return (u64 *)(gd->arch.tlb_addr + (3 * TLB_TABLE_SIZE));
-}
diff --git a/include/configs/tegra210-common.h b/include/configs/tegra210-common.h
index 8f35a7b..5a664b3 100644
--- a/include/configs/tegra210-common.h
+++ b/include/configs/tegra210-common.h
@@ -13,6 +13,22 @@ 
 /* Cortex-A57 uses a cache line size of 64 bytes */
 #define CONFIG_SYS_CACHELINE_SIZE	64
 
+#define CONFIG_SYS_FULL_VA
+#define CONFIG_SYS_MEM_MAP {						\
+	{								\
+		.base = 0x0UL,						\
+		.size = 0x80000000UL,					\
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |		\
+			 PTE_BLOCK_NON_SHARE |				\
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN			\
+	}, {								\
+		.base = 0x80000000UL,					\
+		.size = 0xff80000000UL,					\
+		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |			\
+			 PTE_BLOCK_INNER_SHARE				\
+	},								\
+	}
+
 /*
  * NS16550 Configuration
  */