diff mbox

[U-Boot,01/10] thunderx: Calculate TCR dynamically

Message ID 1456315904-113924-2-git-send-email-agraf@suse.de
State Superseded
Delegated to: Albert ARIBAUD
Headers show

Commit Message

Alexander Graf Feb. 24, 2016, 12:11 p.m. UTC
Based on the memory map we can determine a lot of hard coded fields of
TCR, like the maximum VA and max PA we want to support. Calculate those
dynamically to reduce the chance for pit falls.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/arm/cpu/armv8/cache_v8.c    | 59 +++++++++++++++++++++++++++++++++++++++-
 arch/arm/include/asm/armv8/mmu.h |  6 +---
 include/configs/thunderx_88xx.h  |  3 --
 3 files changed, 59 insertions(+), 9 deletions(-)

Comments

Mark Rutland Feb. 24, 2016, 1:37 p.m. UTC | #1
On Wed, Feb 24, 2016 at 01:11:35PM +0100, Alexander Graf wrote:
> Based on the memory map we can determine a lot of hard coded fields of
> TCR, like the maximum VA and max PA we want to support. Calculate those
> dynamically to reduce the chance for pit falls.
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  arch/arm/cpu/armv8/cache_v8.c    | 59 +++++++++++++++++++++++++++++++++++++++-
>  arch/arm/include/asm/armv8/mmu.h |  6 +---
>  include/configs/thunderx_88xx.h  |  3 --
>  3 files changed, 59 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
> index 71f0020..9229532 100644
> --- a/arch/arm/cpu/armv8/cache_v8.c
> +++ b/arch/arm/cpu/armv8/cache_v8.c
> @@ -38,6 +38,58 @@ static struct mm_region mem_map[] = CONFIG_SYS_MEM_MAP;
>  #define PTL1_ENTRIES CONFIG_SYS_PTL1_ENTRIES
>  #define PTL2_ENTRIES CONFIG_SYS_PTL2_ENTRIES
>  
> +static u64 get_tcr(int el, u64 *pips, u64 *pva_bits)
> +{
> +	u64 max_addr = 0;
> +	u64 ips, va_bits;
> +	u64 tcr;
> +	int i;
> +
> +	/* Find the largest address we need to support */
> +	for (i = 0; i < ARRAY_SIZE(mem_map); i++)
> +		max_addr = max(max_addr, mem_map[i].base + mem_map[i].size);
> +
> +	/* Calculate the maximum physical (and thus virtual) address */
> +	if (max_addr > (1ULL << 44)) {
> +		ips = 5;
> +		va_bits = 48;
> +	} else  if (max_addr > (1ULL << 42)) {
> +		ips = 4;
> +		va_bits = 44;
> +	} else  if (max_addr > (1ULL << 40)) {
> +		ips = 3;
> +		va_bits = 42;
> +	} else  if (max_addr > (1ULL << 36)) {
> +		ips = 2;
> +		va_bits = 40;
> +	} else  if (max_addr > (1ULL << 32)) {
> +		ips = 1;
> +		va_bits = 36;
> +	} else {
> +		ips = 0;
> +		va_bits = 32;
> +	}

In Linux we program IPS to the maximum PARange from ID_AA64MMFR0.

If you did the same here you wouldn't have to iterate over all the
memory map entries to determine the maximum PA you care about (though
you may still need to do that for the VA size).

> +
> +	if (el == 1) {
> +		tcr = TCR_EL1_RSVD | (ips << 32);
> +	} else if (el == 2) {
> +		tcr = TCR_EL2_RSVD | (ips << 16);
> +	} else {
> +		tcr = TCR_EL3_RSVD | (ips << 16);
> +	}
> +
> +	/* PTWs cacheable, inner/outer WBWA and inner shareable */
> +	tcr |= TCR_TG0_64K | TCR_SHARED_INNER | TCR_ORGN_WBWA | TCR_IRGN_WBWA;
> +	tcr |= TCR_T0SZ(VA_BITS);
> +
> +	if (pips)
> +		*pips = ips;
> +	if (pva_bits)
> +		*pva_bits = va_bits;
> +
> +	return tcr;
> +}
> +
>  static void setup_pgtables(void)
>  {
>  	int l1_e, l2_e;
> @@ -110,6 +162,10 @@ __weak void mmu_setup(void)
>  	/* Set up page tables only on BSP */
>  	if (coreid == BSP_COREID)
>  		setup_pgtables();
> +
> +	el = current_el();
> +	set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
> +			  MEMORY_ATTRIBUTES);
>  #else
>  	/* Setup an identity-mapping for all spaces */
>  	for (i = 0; i < (PGTABLE_SIZE >> 3); i++) {
> @@ -128,7 +184,6 @@ __weak void mmu_setup(void)
>  		}
>  	}
>  
> -#endif
>  	/* load TTBR0 */
>  	el = current_el();
>  	if (el == 1) {
> @@ -144,6 +199,8 @@ __weak void mmu_setup(void)
>  				  TCR_EL3_RSVD | TCR_FLAGS | TCR_EL3_IPS_BITS,
>  				  MEMORY_ATTRIBUTES);
>  	}
> +#endif
> +
>  	/* enable the mmu */
>  	set_sctlr(get_sctlr() | CR_M);
>  }
> diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
> index 897f010..39ff745 100644
> --- a/arch/arm/include/asm/armv8/mmu.h
> +++ b/arch/arm/include/asm/armv8/mmu.h
> @@ -159,11 +159,6 @@
>  #define TCR_EL1_IPS_BITS	(UL(3) << 32)	/* 42 bits physical address */
>  #define TCR_EL2_IPS_BITS	(3 << 16)	/* 42 bits physical address */
>  #define TCR_EL3_IPS_BITS	(3 << 16)	/* 42 bits physical address */
> -#else
> -#define TCR_EL1_IPS_BITS	CONFIG_SYS_TCR_EL1_IPS_BITS
> -#define TCR_EL2_IPS_BITS	CONFIG_SYS_TCR_EL2_IPS_BITS
> -#define TCR_EL3_IPS_BITS	CONFIG_SYS_TCR_EL3_IPS_BITS
> -#endif
>  
>  /* PTWs cacheable, inner/outer WBWA and inner shareable */
>  #define TCR_FLAGS		(TCR_TG0_64K |		\
> @@ -171,6 +166,7 @@
>  				TCR_ORGN_WBWA |		\
>  				TCR_IRGN_WBWA |		\
>  				TCR_T0SZ(VA_BITS))
> +#endif
>  
>  #define TCR_EL1_RSVD		(1 << 31)
>  #define TCR_EL2_RSVD		(1 << 31 | 1 << 23)

I suspect you want bit 23 / EPD1 for EL1. Otherwise the core can make
walks starting at whatever junk happens to be in TTBR1.

Thanks,
Mark.

> diff --git a/include/configs/thunderx_88xx.h b/include/configs/thunderx_88xx.h
> index cece4dd..b9f93ad 100644
> --- a/include/configs/thunderx_88xx.h
> +++ b/include/configs/thunderx_88xx.h
> @@ -50,9 +50,6 @@
>  #define CONFIG_SYS_PGTABLE_SIZE		\
>  	((CONFIG_SYS_PTL1_ENTRIES + \
>  	  CONFIG_SYS_MEM_MAP_SIZE * CONFIG_SYS_PTL2_ENTRIES) * 8)
> -#define CONFIG_SYS_TCR_EL1_IPS_BITS	(5UL << 32)
> -#define CONFIG_SYS_TCR_EL2_IPS_BITS	(5 << 16)
> -#define CONFIG_SYS_TCR_EL3_IPS_BITS	(5 << 16)
>  
>  /* Link Definitions */
>  #define CONFIG_SYS_TEXT_BASE		0x00500000
> -- 
> 1.8.5.6
> 
> _______________________________________________
> U-Boot mailing list
> U-Boot@lists.denx.de
> http://lists.denx.de/mailman/listinfo/u-boot
>
Alexander Graf Feb. 24, 2016, 5:39 p.m. UTC | #2
On 02/24/2016 02:37 PM, Mark Rutland wrote:
> On Wed, Feb 24, 2016 at 01:11:35PM +0100, Alexander Graf wrote:
>> Based on the memory map we can determine a lot of hard coded fields of
>> TCR, like the maximum VA and max PA we want to support. Calculate those
>> dynamically to reduce the chance for pit falls.
>>
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> ---
>>   arch/arm/cpu/armv8/cache_v8.c    | 59 +++++++++++++++++++++++++++++++++++++++-
>>   arch/arm/include/asm/armv8/mmu.h |  6 +---
>>   include/configs/thunderx_88xx.h  |  3 --
>>   3 files changed, 59 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
>> index 71f0020..9229532 100644
>> --- a/arch/arm/cpu/armv8/cache_v8.c
>> +++ b/arch/arm/cpu/armv8/cache_v8.c
>> @@ -38,6 +38,58 @@ static struct mm_region mem_map[] = CONFIG_SYS_MEM_MAP;
>>   #define PTL1_ENTRIES CONFIG_SYS_PTL1_ENTRIES
>>   #define PTL2_ENTRIES CONFIG_SYS_PTL2_ENTRIES
>>   
>> +static u64 get_tcr(int el, u64 *pips, u64 *pva_bits)
>> +{
>> +	u64 max_addr = 0;
>> +	u64 ips, va_bits;
>> +	u64 tcr;
>> +	int i;
>> +
>> +	/* Find the largest address we need to support */
>> +	for (i = 0; i < ARRAY_SIZE(mem_map); i++)
>> +		max_addr = max(max_addr, mem_map[i].base + mem_map[i].size);
>> +
>> +	/* Calculate the maximum physical (and thus virtual) address */
>> +	if (max_addr > (1ULL << 44)) {
>> +		ips = 5;
>> +		va_bits = 48;
>> +	} else  if (max_addr > (1ULL << 42)) {
>> +		ips = 4;
>> +		va_bits = 44;
>> +	} else  if (max_addr > (1ULL << 40)) {
>> +		ips = 3;
>> +		va_bits = 42;
>> +	} else  if (max_addr > (1ULL << 36)) {
>> +		ips = 2;
>> +		va_bits = 40;
>> +	} else  if (max_addr > (1ULL << 32)) {
>> +		ips = 1;
>> +		va_bits = 36;
>> +	} else {
>> +		ips = 0;
>> +		va_bits = 32;
>> +	}
> In Linux we program IPS to the maximum PARange from ID_AA64MMFR0.
>
> If you did the same here you wouldn't have to iterate over all the
> memory map entries to determine the maximum PA you care about (though
> you may still need to do that for the VA size).

Since we'd want to find the largest number for VA to trim one level of 
page table if we can, I don't see how it would buy is much to take the 
maximum supported PARange of the core into account.

>
>> +
>> +	if (el == 1) {
>> +		tcr = TCR_EL1_RSVD | (ips << 32);
>> +	} else if (el == 2) {
>> +		tcr = TCR_EL2_RSVD | (ips << 16);
>> +	} else {
>> +		tcr = TCR_EL3_RSVD | (ips << 16);
>> +	}
>> +
>> +	/* PTWs cacheable, inner/outer WBWA and inner shareable */
>> +	tcr |= TCR_TG0_64K | TCR_SHARED_INNER | TCR_ORGN_WBWA | TCR_IRGN_WBWA;
>> +	tcr |= TCR_T0SZ(VA_BITS);
>> +
>> +	if (pips)
>> +		*pips = ips;
>> +	if (pva_bits)
>> +		*pva_bits = va_bits;
>> +
>> +	return tcr;
>> +}
>> +
>>   static void setup_pgtables(void)
>>   {
>>   	int l1_e, l2_e;
>> @@ -110,6 +162,10 @@ __weak void mmu_setup(void)
>>   	/* Set up page tables only on BSP */
>>   	if (coreid == BSP_COREID)
>>   		setup_pgtables();
>> +
>> +	el = current_el();
>> +	set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
>> +			  MEMORY_ATTRIBUTES);
>>   #else
>>   	/* Setup an identity-mapping for all spaces */
>>   	for (i = 0; i < (PGTABLE_SIZE >> 3); i++) {
>> @@ -128,7 +184,6 @@ __weak void mmu_setup(void)
>>   		}
>>   	}
>>   
>> -#endif
>>   	/* load TTBR0 */
>>   	el = current_el();
>>   	if (el == 1) {
>> @@ -144,6 +199,8 @@ __weak void mmu_setup(void)
>>   				  TCR_EL3_RSVD | TCR_FLAGS | TCR_EL3_IPS_BITS,
>>   				  MEMORY_ATTRIBUTES);
>>   	}
>> +#endif
>> +
>>   	/* enable the mmu */
>>   	set_sctlr(get_sctlr() | CR_M);
>>   }
>> diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
>> index 897f010..39ff745 100644
>> --- a/arch/arm/include/asm/armv8/mmu.h
>> +++ b/arch/arm/include/asm/armv8/mmu.h
>> @@ -159,11 +159,6 @@
>>   #define TCR_EL1_IPS_BITS	(UL(3) << 32)	/* 42 bits physical address */
>>   #define TCR_EL2_IPS_BITS	(3 << 16)	/* 42 bits physical address */
>>   #define TCR_EL3_IPS_BITS	(3 << 16)	/* 42 bits physical address */
>> -#else
>> -#define TCR_EL1_IPS_BITS	CONFIG_SYS_TCR_EL1_IPS_BITS
>> -#define TCR_EL2_IPS_BITS	CONFIG_SYS_TCR_EL2_IPS_BITS
>> -#define TCR_EL3_IPS_BITS	CONFIG_SYS_TCR_EL3_IPS_BITS
>> -#endif
>>   
>>   /* PTWs cacheable, inner/outer WBWA and inner shareable */
>>   #define TCR_FLAGS		(TCR_TG0_64K |		\
>> @@ -171,6 +166,7 @@
>>   				TCR_ORGN_WBWA |		\
>>   				TCR_IRGN_WBWA |		\
>>   				TCR_T0SZ(VA_BITS))
>> +#endif
>>   
>>   #define TCR_EL1_RSVD		(1 << 31)
>>   #define TCR_EL2_RSVD		(1 << 31 | 1 << 23)
> I suspect you want bit 23 / EPD1 for EL1. Otherwise the core can make
> walks starting at whatever junk happens to be in TTBR1.

Yes. Definitely. It's not reserved though, we have to add it in another 
place, but nice catch!


Alex
Mark Rutland Feb. 25, 2016, 11:58 a.m. UTC | #3
On Wed, Feb 24, 2016 at 06:39:22PM +0100, Alexander Graf wrote:
> On 02/24/2016 02:37 PM, Mark Rutland wrote:
> >On Wed, Feb 24, 2016 at 01:11:35PM +0100, Alexander Graf wrote:
> >>+	/* Calculate the maximum physical (and thus virtual) address */
> >>+	if (max_addr > (1ULL << 44)) {
> >>+		ips = 5;
> >>+		va_bits = 48;
> >>+	} else  if (max_addr > (1ULL << 42)) {
> >>+		ips = 4;
> >>+		va_bits = 44;
> >>+	} else  if (max_addr > (1ULL << 40)) {
> >>+		ips = 3;
> >>+		va_bits = 42;
> >>+	} else  if (max_addr > (1ULL << 36)) {
> >>+		ips = 2;
> >>+		va_bits = 40;
> >>+	} else  if (max_addr > (1ULL << 32)) {
> >>+		ips = 1;
> >>+		va_bits = 36;
> >>+	} else {
> >>+		ips = 0;
> >>+		va_bits = 32;
> >>+	}
> >In Linux we program IPS to the maximum PARange from ID_AA64MMFR0.
> >
> >If you did the same here you wouldn't have to iterate over all the
> >memory map entries to determine the maximum PA you care about (though
> >you may still need to do that for the VA size).
> 
> Since we'd want to find the largest number for VA to trim one level
> of page table if we can, I don't see how it would buy is much to
> take the maximum supported PARange of the core into account.

It would simply be a saving of lines, as you'd program the same IPS
value regardless of max_addr (and you have to expect that PARange is
sufficient regardless).

Otherwise, yes, it doesn't buy you anything.

Thanks,
Mark.
diff mbox

Patch

diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index 71f0020..9229532 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -38,6 +38,58 @@  static struct mm_region mem_map[] = CONFIG_SYS_MEM_MAP;
 #define PTL1_ENTRIES CONFIG_SYS_PTL1_ENTRIES
 #define PTL2_ENTRIES CONFIG_SYS_PTL2_ENTRIES
 
+static u64 get_tcr(int el, u64 *pips, u64 *pva_bits)
+{
+	u64 max_addr = 0;
+	u64 ips, va_bits;
+	u64 tcr;
+	int i;
+
+	/* Find the largest address we need to support */
+	for (i = 0; i < ARRAY_SIZE(mem_map); i++)
+		max_addr = max(max_addr, mem_map[i].base + mem_map[i].size);
+
+	/* Calculate the maximum physical (and thus virtual) address */
+	if (max_addr > (1ULL << 44)) {
+		ips = 5;
+		va_bits = 48;
+	} else  if (max_addr > (1ULL << 42)) {
+		ips = 4;
+		va_bits = 44;
+	} else  if (max_addr > (1ULL << 40)) {
+		ips = 3;
+		va_bits = 42;
+	} else  if (max_addr > (1ULL << 36)) {
+		ips = 2;
+		va_bits = 40;
+	} else  if (max_addr > (1ULL << 32)) {
+		ips = 1;
+		va_bits = 36;
+	} else {
+		ips = 0;
+		va_bits = 32;
+	}
+
+	if (el == 1) {
+		tcr = TCR_EL1_RSVD | (ips << 32);
+	} else if (el == 2) {
+		tcr = TCR_EL2_RSVD | (ips << 16);
+	} else {
+		tcr = TCR_EL3_RSVD | (ips << 16);
+	}
+
+	/* PTWs cacheable, inner/outer WBWA and inner shareable */
+	tcr |= TCR_TG0_64K | TCR_SHARED_INNER | TCR_ORGN_WBWA | TCR_IRGN_WBWA;
+	tcr |= TCR_T0SZ(VA_BITS);
+
+	if (pips)
+		*pips = ips;
+	if (pva_bits)
+		*pva_bits = va_bits;
+
+	return tcr;
+}
+
 static void setup_pgtables(void)
 {
 	int l1_e, l2_e;
@@ -110,6 +162,10 @@  __weak void mmu_setup(void)
 	/* Set up page tables only on BSP */
 	if (coreid == BSP_COREID)
 		setup_pgtables();
+
+	el = current_el();
+	set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
+			  MEMORY_ATTRIBUTES);
 #else
 	/* Setup an identity-mapping for all spaces */
 	for (i = 0; i < (PGTABLE_SIZE >> 3); i++) {
@@ -128,7 +184,6 @@  __weak void mmu_setup(void)
 		}
 	}
 
-#endif
 	/* load TTBR0 */
 	el = current_el();
 	if (el == 1) {
@@ -144,6 +199,8 @@  __weak void mmu_setup(void)
 				  TCR_EL3_RSVD | TCR_FLAGS | TCR_EL3_IPS_BITS,
 				  MEMORY_ATTRIBUTES);
 	}
+#endif
+
 	/* enable the mmu */
 	set_sctlr(get_sctlr() | CR_M);
 }
diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
index 897f010..39ff745 100644
--- a/arch/arm/include/asm/armv8/mmu.h
+++ b/arch/arm/include/asm/armv8/mmu.h
@@ -159,11 +159,6 @@ 
 #define TCR_EL1_IPS_BITS	(UL(3) << 32)	/* 42 bits physical address */
 #define TCR_EL2_IPS_BITS	(3 << 16)	/* 42 bits physical address */
 #define TCR_EL3_IPS_BITS	(3 << 16)	/* 42 bits physical address */
-#else
-#define TCR_EL1_IPS_BITS	CONFIG_SYS_TCR_EL1_IPS_BITS
-#define TCR_EL2_IPS_BITS	CONFIG_SYS_TCR_EL2_IPS_BITS
-#define TCR_EL3_IPS_BITS	CONFIG_SYS_TCR_EL3_IPS_BITS
-#endif
 
 /* PTWs cacheable, inner/outer WBWA and inner shareable */
 #define TCR_FLAGS		(TCR_TG0_64K |		\
@@ -171,6 +166,7 @@ 
 				TCR_ORGN_WBWA |		\
 				TCR_IRGN_WBWA |		\
 				TCR_T0SZ(VA_BITS))
+#endif
 
 #define TCR_EL1_RSVD		(1 << 31)
 #define TCR_EL2_RSVD		(1 << 31 | 1 << 23)
diff --git a/include/configs/thunderx_88xx.h b/include/configs/thunderx_88xx.h
index cece4dd..b9f93ad 100644
--- a/include/configs/thunderx_88xx.h
+++ b/include/configs/thunderx_88xx.h
@@ -50,9 +50,6 @@ 
 #define CONFIG_SYS_PGTABLE_SIZE		\
 	((CONFIG_SYS_PTL1_ENTRIES + \
 	  CONFIG_SYS_MEM_MAP_SIZE * CONFIG_SYS_PTL2_ENTRIES) * 8)
-#define CONFIG_SYS_TCR_EL1_IPS_BITS	(5UL << 32)
-#define CONFIG_SYS_TCR_EL2_IPS_BITS	(5 << 16)
-#define CONFIG_SYS_TCR_EL3_IPS_BITS	(5 << 16)
 
 /* Link Definitions */
 #define CONFIG_SYS_TEXT_BASE		0x00500000