diff mbox

[U-Boot,v3,16/66] armv8: move low-level assembly functions into function-sections

Message ID 1501269764-13969-17-git-send-email-philipp.tomsich@theobroma-systems.com
State Superseded
Delegated to: Philipp Tomsich
Headers show

Commit Message

Philipp Tomsich July 28, 2017, 7:21 p.m. UTC
TPL builds today don't need to call into firmware or set up the MMU
(if this changes, it should be controlled through a config option
whether to include this or not), but include the needed support code
for this anyway.  By moving these unused low-level functions into
seperate function-sections, the linker can garbage-collect the unused
sections.

Note that (if DM support is enabled), there will be a call to the
cache-flushing code from alloc_priv(...) in drivers/core/device.c.
This then add 52 bytes of binary size (an increase from 20589 to 20641
bytes) compared to completely removing this code.

Even for a feature-rich TPL (including DM support as for the RK3368),
this equates to a size difference of significantly more than 10% in
TPL binary size.

Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>

---

Changes in v3:
- change to use function-sections (instead of disabling at the
  Makefile-level for TPL builds) per Tom's suggestion

Changes in v2: None

 arch/arm/cpu/armv8/Makefile     |  2 +-
 arch/arm/cpu/armv8/cache.S      | 22 ++++++++++++++++++++++
 arch/arm/cpu/armv8/tlb.S        |  4 +++-
 arch/arm/cpu/armv8/transition.S |  6 ++++++
 4 files changed, 32 insertions(+), 2 deletions(-)

Comments

Simon Glass Aug. 1, 2017, 9:49 a.m. UTC | #1
On 28 July 2017 at 13:21, Philipp Tomsich
<philipp.tomsich@theobroma-systems.com> wrote:
> TPL builds today don't need to call into firmware or set up the MMU
> (if this changes, it should be controlled through a config option
> whether to include this or not), but include the needed support code
> for this anyway.  By moving these unused low-level functions into
> seperate function-sections, the linker can garbage-collect the unused
> sections.
>
> Note that (if DM support is enabled), there will be a call to the
> cache-flushing code from alloc_priv(...) in drivers/core/device.c.
> This then add 52 bytes of binary size (an increase from 20589 to 20641
> bytes) compared to completely removing this code.
>
> Even for a feature-rich TPL (including DM support as for the RK3368),
> this equates to a size difference of significantly more than 10% in
> TPL binary size.
>
> Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
>
> ---
>
> Changes in v3:
> - change to use function-sections (instead of disabling at the
>   Makefile-level for TPL builds) per Tom's suggestion
>
> Changes in v2: None
>
>  arch/arm/cpu/armv8/Makefile     |  2 +-
>  arch/arm/cpu/armv8/cache.S      | 22 ++++++++++++++++++++++
>  arch/arm/cpu/armv8/tlb.S        |  4 +++-
>  arch/arm/cpu/armv8/transition.S |  6 ++++++
>  4 files changed, 32 insertions(+), 2 deletions(-)
>

Reviewed-by: Simon Glass <sjg@chromium.org>

> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> index c447085..64f35f1 100644
> --- a/arch/arm/cpu/armv8/Makefile
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -10,11 +10,11 @@ extra-y     := start.o
>  obj-y  += cpu.o
>  obj-y  += generic_timer.o
>  obj-y  += cache_v8.o
> -obj-y  += exceptions.o
>  obj-y  += cache.o
>  obj-y  += tlb.o
>  obj-y  += transition.o
>  obj-y  += fwcall.o
> +obj-y  += exceptions.o

But what is happened here?

>  obj-y  += cpu-dt.o
>  obj-$(CONFIG_ARM_SMCCC)                += smccc-call.o
diff mbox

Patch

diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
index c447085..64f35f1 100644
--- a/arch/arm/cpu/armv8/Makefile
+++ b/arch/arm/cpu/armv8/Makefile
@@ -10,11 +10,11 @@  extra-y	:= start.o
 obj-y	+= cpu.o
 obj-y	+= generic_timer.o
 obj-y	+= cache_v8.o
-obj-y	+= exceptions.o
 obj-y	+= cache.o
 obj-y	+= tlb.o
 obj-y	+= transition.o
 obj-y	+= fwcall.o
+obj-y	+= exceptions.o
 obj-y	+= cpu-dt.o
 obj-$(CONFIG_ARM_SMCCC)		+= smccc-call.o
 
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
index 7cba308..ea845d1 100644
--- a/arch/arm/cpu/armv8/cache.S
+++ b/arch/arm/cpu/armv8/cache.S
@@ -22,6 +22,7 @@ 
  * x1: 0 clean & invalidate, 1 invalidate only
  * x2~x9: clobbered
  */
+.pushsection .text.__asm_dcache_level, "ax"
 ENTRY(__asm_dcache_level)
 	lsl	x12, x0, #1
 	msr	csselr_el1, x12		/* select cache level */
@@ -58,6 +59,7 @@  loop_way:
 
 	ret
 ENDPROC(__asm_dcache_level)
+.popsection
 
 /*
  * void __asm_flush_dcache_all(int invalidate_only)
@@ -66,6 +68,7 @@  ENDPROC(__asm_dcache_level)
  *
  * flush or invalidate all data cache by SET/WAY.
  */
+.pushsection .text.__asm_dcache_all, "ax"
 ENTRY(__asm_dcache_all)
 	mov	x1, x0
 	dsb	sy
@@ -102,16 +105,21 @@  skip:
 finished:
 	ret
 ENDPROC(__asm_dcache_all)
+.popsection
 
+.pushsection .text.__asm_flush_dcache_all, "ax"
 ENTRY(__asm_flush_dcache_all)
 	mov	x0, #0
 	b	__asm_dcache_all
 ENDPROC(__asm_flush_dcache_all)
+.popsection
 
+.pushsection .text.__asm_invalidate_dcache_all, "ax"
 ENTRY(__asm_invalidate_dcache_all)
 	mov	x0, #0x1
 	b	__asm_dcache_all
 ENDPROC(__asm_invalidate_dcache_all)
+.popsection
 
 /*
  * void __asm_flush_dcache_range(start, end)
@@ -121,6 +129,7 @@  ENDPROC(__asm_invalidate_dcache_all)
  * x0: start address
  * x1: end address
  */
+.pushsection .text.__asm_flush_dcache_range, "ax"
 ENTRY(__asm_flush_dcache_range)
 	mrs	x3, ctr_el0
 	lsr	x3, x3, #16
@@ -138,6 +147,7 @@  ENTRY(__asm_flush_dcache_range)
 	dsb	sy
 	ret
 ENDPROC(__asm_flush_dcache_range)
+.popsection
 /*
  * void __asm_invalidate_dcache_range(start, end)
  *
@@ -146,6 +156,7 @@  ENDPROC(__asm_flush_dcache_range)
  * x0: start address
  * x1: end address
  */
+.pushsection .text.__asm_invalidate_dcache_range, "ax"
 ENTRY(__asm_invalidate_dcache_range)
 	mrs	x3, ctr_el0
 	ubfm	x3, x3, #16, #19
@@ -162,41 +173,51 @@  ENTRY(__asm_invalidate_dcache_range)
 	dsb	sy
 	ret
 ENDPROC(__asm_invalidate_dcache_range)
+.popsection
 
 /*
  * void __asm_invalidate_icache_all(void)
  *
  * invalidate all tlb entries.
  */
+.pushsection .text.__asm_invalidate_icache_all, "ax"
 ENTRY(__asm_invalidate_icache_all)
 	ic	ialluis
 	isb	sy
 	ret
 ENDPROC(__asm_invalidate_icache_all)
+.popsection
 
+.pushsection .text.__asm_invalidate_l3_dcache, "ax"
 ENTRY(__asm_invalidate_l3_dcache)
 	mov	x0, #0			/* return status as success */
 	ret
 ENDPROC(__asm_invalidate_l3_dcache)
 	.weak	__asm_invalidate_l3_dcache
+.popsection
 
+.pushsection .text.__asm_flush_l3_dcache, "ax"
 ENTRY(__asm_flush_l3_dcache)
 	mov	x0, #0			/* return status as success */
 	ret
 ENDPROC(__asm_flush_l3_dcache)
 	.weak	__asm_flush_l3_dcache
+.popsection
 
+.pushsection .text.__asm_invalidate_l3_icache, "ax"
 ENTRY(__asm_invalidate_l3_icache)
 	mov	x0, #0			/* return status as success */
 	ret
 ENDPROC(__asm_invalidate_l3_icache)
 	.weak	__asm_invalidate_l3_icache
+.popsection
 
 /*
  * void __asm_switch_ttbr(ulong new_ttbr)
  *
  * Safely switches to a new page table.
  */
+.pushsection .text.__asm_switch_ttbr, "ax"
 ENTRY(__asm_switch_ttbr)
 	/* x2 = SCTLR (alive throghout the function) */
 	switch_el x4, 3f, 2f, 1f
@@ -244,3 +265,4 @@  ENTRY(__asm_switch_ttbr)
 
 	ret	x3
 ENDPROC(__asm_switch_ttbr)
+.popsection
diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
index 945445b..6743111 100644
--- a/arch/arm/cpu/armv8/tlb.S
+++ b/arch/arm/cpu/armv8/tlb.S
@@ -14,7 +14,8 @@ 
  * void __asm_invalidate_tlb_all(void)
  *
  * invalidate all tlb entries.
- */
+*/
+.pushsection .text.__asm_invalidate_tlb_all, "ax"
 ENTRY(__asm_invalidate_tlb_all)
 	switch_el x9, 3f, 2f, 1f
 3:	tlbi	alle3
@@ -31,3 +32,4 @@  ENTRY(__asm_invalidate_tlb_all)
 0:
 	ret
 ENDPROC(__asm_invalidate_tlb_all)
+.popsection
diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
index ca07465..7aa6935 100644
--- a/arch/arm/cpu/armv8/transition.S
+++ b/arch/arm/cpu/armv8/transition.S
@@ -10,6 +10,7 @@ 
 #include <linux/linkage.h>
 #include <asm/macro.h>
 
+.pushsection .text.armv8_switch_to_el2, "ax"
 ENTRY(armv8_switch_to_el2)
 	switch_el x6, 1f, 0f, 0f
 0:
@@ -30,7 +31,9 @@  ENTRY(armv8_switch_to_el2)
 	br x4
 1:	armv8_switch_to_el2_m x4, x5, x6
 ENDPROC(armv8_switch_to_el2)
+.popsection
 
+.pushsection .text.armv8_switch_to_el1, "ax"
 ENTRY(armv8_switch_to_el1)
 	switch_el x6, 0f, 1f, 0f
 0:
@@ -40,7 +43,10 @@  ENTRY(armv8_switch_to_el1)
 	br x4
 1:	armv8_switch_to_el1_m x4, x5, x6
 ENDPROC(armv8_switch_to_el1)
+.popsection
 
+.pushsection .text.armv8_el2_to_aarch32, "ax"
 WEAK(armv8_el2_to_aarch32)
 	ret
 ENDPROC(armv8_el2_to_aarch32)
+.popsection