diff mbox

[U-Boot,v2,1/2] armv8: Support loading 32-bit OS in AArch32 execution state

Message ID 1463646362-15531-2-git-send-email-b18965@freescale.com
State Superseded
Headers show

Commit Message

Alison Wang May 19, 2016, 8:26 a.m. UTC
To support loading a 32-bit OS, the execution state will change from
AArch64 to AArch32 when jumping to kernel.

The architecture information will be got through checking FIT
image, then U-Boot will load 32-bit OS or 64-bit OS automatically.

Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
Signed-off-by: Alison Wang <alison.wang@nxp.com>
Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
---
Changes in v2:
- armv8_switch_to_el2_aarch32() is removed. armv8_switch_to_el2_m is used
  to switch to AArch64 EL2 or AArch32 Hyp.
- armv8_switch_to_el1_aarch32() is removed. armv8_switch_to_el1_m is used
  to switch to AArch64 EL1 or AArch32 SVC.

 arch/arm/cpu/armv8/transition.S |  8 ++---
 arch/arm/include/asm/macro.h    | 80 +++++++++++++++++++++++++++++++++++++----
 arch/arm/include/asm/system.h   | 25 +++++++++++--
 arch/arm/lib/bootm.c            | 26 +++++++++++---
 common/image-fit.c              | 12 ++++++-
 5 files changed, 133 insertions(+), 18 deletions(-)

Comments

Alexander Graf May 19, 2016, 8:58 a.m. UTC | #1
On 05/19/2016 10:26 AM, Alison Wang wrote:
> To support loading a 32-bit OS, the execution state will change from
> AArch64 to AArch32 when jumping to kernel.
>
> The architecture information will be got through checking FIT
> image, then U-Boot will load 32-bit OS or 64-bit OS automatically.
>
> Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
> Signed-off-by: Alison Wang <alison.wang@nxp.com>
> Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
> ---
> Changes in v2:
> - armv8_switch_to_el2_aarch32() is removed. armv8_switch_to_el2_m is used
>    to switch to AArch64 EL2 or AArch32 Hyp.
> - armv8_switch_to_el1_aarch32() is removed. armv8_switch_to_el1_m is used
>    to switch to AArch64 EL1 or AArch32 SVC.
>
>   arch/arm/cpu/armv8/transition.S |  8 ++---
>   arch/arm/include/asm/macro.h    | 80 +++++++++++++++++++++++++++++++++++++----
>   arch/arm/include/asm/system.h   | 25 +++++++++++--
>   arch/arm/lib/bootm.c            | 26 +++++++++++---
>   common/image-fit.c              | 12 ++++++-
>   5 files changed, 133 insertions(+), 18 deletions(-)
>
> diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
> index 253a39b..9fa3b59 100644
> --- a/arch/arm/cpu/armv8/transition.S
> +++ b/arch/arm/cpu/armv8/transition.S
> @@ -11,13 +11,13 @@
>   #include <asm/macro.h>
>   
>   ENTRY(armv8_switch_to_el2)
> -	switch_el x0, 1f, 0f, 0f
> +	switch_el x4, 1f, 0f, 0f
>   0:	ret
> -1:	armv8_switch_to_el2_m x0
> +1:	armv8_switch_to_el2_m x0, x1, x2, x3, x4, x5, x6
>   ENDPROC(armv8_switch_to_el2)
>   
>   ENTRY(armv8_switch_to_el1)
> -	switch_el x0, 0f, 1f, 0f
> +	switch_el x4, 0f, 1f, 0f
>   0:	ret
> -1:	armv8_switch_to_el1_m x0, x1
> +1:	armv8_switch_to_el1_m x0, x1, x2, x3, x4, x5, x6
>   ENDPROC(armv8_switch_to_el1)
> diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
> index 9bb0efa..56d77f6 100644
> --- a/arch/arm/include/asm/macro.h
> +++ b/arch/arm/include/asm/macro.h
> @@ -8,6 +8,9 @@
>   
>   #ifndef __ASM_ARM_MACRO_H__
>   #define __ASM_ARM_MACRO_H__
> +
> +#include <asm/system.h>
> +
>   #ifdef __ASSEMBLY__
>   
>   /*
> @@ -135,9 +138,20 @@ lr	.req	x30
>   #endif
>   .endm
>   
> -.macro armv8_switch_to_el2_m, xreg1
> -	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1 */
> -	mov	\xreg1, #0x5b1
> +.macro armv8_switch_to_el2_m, xreg1, xreg2, xreg3, xreg4, xreg5, xreg6, xreg7

These arguments should probably get documented :). Also xreg4 doesn't 
seem to actually be a scratch register, but a real parameter. So please 
name it accordingly.

> +	mov	\xreg5, \xreg1
> +	mov	\xreg6, \xreg2
> +	mov	\xreg7, \xreg3
> +
> +	/*
> +	 * If the next lower exception level is AArch64, 64bit EL2 | HCE | SMD |
> +	 * RES1 (Bits[5:4]) | Non-secure EL0/EL1. If the next lower exception
> +	 * level is AArch32, 32bit EL2 | HCE | SMD | RES1 (Bits[5:4]) |
> +	 * Non-secure EL0/EL1.
> +	 */
> +	mov	\xreg1, #0x1b1
> +	lsl	\xreg2, \xreg4, #10
> +	orr	\xreg1, \xreg1, \xreg2

Is there any way you can make this obvious from the code? Basically we 
would want something like

mov \xreg1, #(SCR_EL3_EL2_AA64 | SCR_EL3_HCE | SCR_EL3_SMD ...)

I'm sure there are creative ways to achieve this even with the high bits 
set. Maybe ldr = works?

>   	msr	scr_el3, \xreg1
>   	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3 */
>   	mov	\xreg1, #0x33ff
> @@ -156,18 +170,46 @@ lr	.req	x30
>   	movk	\xreg1, #0x30C5, lsl #16
>   	msr	sctlr_el2, \xreg1
>   
> -	/* Return to the EL2_SP2 mode from EL3 */
>   	mov	\xreg1, sp
>   	msr	sp_el2, \xreg1		/* Migrate SP */
>   	mrs	\xreg1, vbar_el3
>   	msr	vbar_el2, \xreg1	/* Migrate VBAR */
> +
> +	/* Check switch to AArch64 EL2 or AArch32 Hypervisor mode */
> +	ldr	\xreg1, =ES_TO_AARCH64
> +	cmp	\xreg4, \xreg1
> +	b.eq	1f
> +	b.ne	2f

Just b.ne should be enough, no?

> +
> +1:
> +	/* Return to the EL2_SP2 mode from EL3 */
>   	mov	\xreg1, #0x3c9

Magic values again :). Please convert them to constants getting ORed.

>   	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
>   	msr	elr_el3, lr
> +
> +	mov	\xreg1, \xreg5
> +	mov	\xreg2, \xreg6
> +	mov	\xreg3, \xreg7
> +	eret
> +
> +2:
> +	/* Return to AArch32 Hypervisor mode */
> +	mov     \xreg1, #0x1da
> +	msr	spsr_el3, \xreg1	/* Hyp | D | A | I | F */
> +	msr     elr_el3, \xreg5
> +
> +	mov	\xreg1, #0
> +	mov	\xreg2, \xreg6
> +	mov	\xreg3, \xreg7
>   	eret
> +
>   .endm
>   
> -.macro armv8_switch_to_el1_m, xreg1, xreg2
> +.macro armv8_switch_to_el1_m, xreg1, xreg2, xreg3, xreg4, xreg5, xreg6, xreg7
> +	mov	\xreg5, \xreg1
> +	mov	\xreg6, \xreg2
> +	mov	\xreg7, \xreg3
> +
>   	/* Initialize Generic Timers */
>   	mrs	\xreg1, cnthctl_el2
>   	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to timers */
> @@ -188,7 +230,7 @@ lr	.req	x30
>   	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
>   
>   	/* Initialize HCR_EL2 */
> -	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
> +	lsl	\xreg1, \xreg4, #31

This deserves a comment. Imagine you read this code again in 5 years. 
Would you still understand what's going on?

>   	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
>   	msr	hcr_el2, \xreg1
>   
> @@ -203,15 +245,39 @@ lr	.req	x30
>   	movk	\xreg1, #0x30d0, lsl #16
>   	msr	sctlr_el1, \xreg1
>   
> -	/* Return to the EL1_SP1 mode from EL2 */
>   	mov	\xreg1, sp
>   	msr	sp_el1, \xreg1		/* Migrate SP */
>   	mrs	\xreg1, vbar_el2
>   	msr	vbar_el1, \xreg1	/* Migrate VBAR */
> +
> +	/* Check switch to AArch64 EL1 or AArch32 Supervisor mode */
> +	ldr	\xreg1, =ES_TO_AARCH64
> +	cmp	\xreg4, \xreg1
> +	b.eq	1f
> +	b.ne	2f
> +
> +1:
> +	/* Return to the EL1_SP1 mode from EL2 */
>   	mov	\xreg1, #0x3c5
>   	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
>   	msr	elr_el2, lr
> +
> +	mov	\xreg1, \xreg5
> +	mov	\xreg2, \xreg6
> +	mov	\xreg3, \xreg7
> +	eret
> +
> +2:
> +	/* Return to the Supervisor mode from EL2 */
> +	mov     \xreg1, #0x1d3
> +	msr     spsr_el2, \xreg1        /* Supervisor | D | A | I | F */
> +	msr     elr_el2, \xreg5
> +
> +	mov	\xreg1, #0
> +	mov	\xreg2, \xreg6
> +	mov	\xreg3, \xreg7
>   	eret
> +
>   .endm
>   
>   #if defined(CONFIG_GICV3)
> diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
> index 9ae890a..fcfe3ae 100644
> --- a/arch/arm/include/asm/system.h
> +++ b/arch/arm/include/asm/system.h
> @@ -17,6 +17,9 @@
>   #define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
>   #define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
>   
> +#define ES_TO_AARCH64			1
> +#define ES_TO_AARCH32			0
> +
>   #ifndef __ASSEMBLY__
>   
>   u64 get_page_table_size(void);
> @@ -100,8 +103,26 @@ void __asm_invalidate_icache_all(void);
>   int __asm_flush_l3_cache(void);
>   void __asm_switch_ttbr(u64 new_ttbr);
>   
> -void armv8_switch_to_el2(void);
> -void armv8_switch_to_el1(void);
> +/*
> + * Switch from EL3 to EL2 for ARMv8
> + *
> + * @entry_point: kernel entry point
> + * @mach_nr:     machine nr
> + * @fdt_addr:    fdt address
> + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
> + */
> +void armv8_switch_to_el2(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> +			 u64 es_flag);
> +/*
> + * Switch from EL2 to EL1 for ARMv8
> + *
> + * @entry_point: kernel entry point
> + * @mach_nr:     machine nr
> + * @fdt_addr:    fdt address
> + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
> + */
> +void armv8_switch_to_el1(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> +			 u64 es_flag);
>   void gic_init(void);
>   void gic_send_sgi(unsigned long sgino);
>   void wait_for_wakeup(void);
> diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
> index 0838d89..f6f685a 100644
> --- a/arch/arm/lib/bootm.c
> +++ b/arch/arm/lib/bootm.c
> @@ -193,9 +193,9 @@ static void do_nonsec_virt_switch(void)
>   {
>   	smp_kick_all_cpus();
>   	dcache_disable();	/* flush cache before swtiching to EL2 */
> -	armv8_switch_to_el2();
> +	armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);

Just rename the existing function to armv8_switch_to_el2_adv() or so and 
provide armv8_switch_to_el2() as static inline function that simply 
invokes armv8_switch_to_el2_adv(0, 0, 0, ES_TO_AARCH64);

>   #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> -	armv8_switch_to_el1();
> +	armv8_switch_to_el1(0, 0, 0, ES_TO_AARCH64);
>   #endif
>   }
>   #endif
> @@ -286,8 +286,26 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
>   	announce_and_cleanup(fake);
>   
>   	if (!fake) {
> -		do_nonsec_virt_switch();
> -		kernel_entry(images->ft_addr, NULL, NULL, NULL);
> +		if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
> +		    (images->os.arch == IH_ARCH_ARM)) {
> +			smp_kick_all_cpus();
> +			dcache_disable();

This open codes part of do_nonsec_virt_switch().

Could we just clean up all of that mess a bit more maybe? How about you call

do_nonsec_virt_switch(ES_EL2 | ES_AARCH32, entry, images->ft_addr) or
do_nonsec_virt_switch(ES_EL1 | ES_AARCH64, entry, images->ft_addr)

Then the if() would only need to set ES_AARCH32/64 in a variable and 
everything else goes along the same code.

> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> +			armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
> +			armv8_switch_to_el1((u64)images->ep,
> +					    (u64)gd->bd->bi_arch_number,
> +					    (u64)images->ft_addr,
> +					    ES_TO_AARCH32);
> +#else
> +			armv8_switch_to_el2((u64)images->ep,
> +					    (u64)gd->bd->bi_arch_number,
> +					    (u64)images->ft_addr,
> +					    ES_TO_AARCH32);
> +#endif
> +		} else {
> +			do_nonsec_virt_switch();
> +			kernel_entry(images->ft_addr, NULL, NULL, NULL);
> +		}
>   	}
>   #else
>   	unsigned long machid = gd->bd->bi_arch_number;
> diff --git a/common/image-fit.c b/common/image-fit.c
> index 25f8a11..2986469 100644
> --- a/common/image-fit.c
> +++ b/common/image-fit.c
> @@ -1163,7 +1163,8 @@ int fit_image_check_arch(const void *fit, int noffset, uint8_t arch)
>   	if (fit_image_get_arch(fit, noffset, &image_arch))
>   		return 0;
>   	return (arch == image_arch) ||
> -		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
> +		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64) ||
> +		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);

This should be a Kconfig option. ThunderX doesn't support AArch32 execution.

>   }
>   
>   /**
> @@ -1586,6 +1587,9 @@ int fit_image_load(bootm_headers_t *images, ulong addr,
>   	int type_ok, os_ok;
>   	ulong load, data, len;
>   	uint8_t os;
> +#ifndef USE_HOSTCC
> +	uint8_t os_arch;
> +#endif
>   	const char *prop_name;
>   	int ret;
>   
> @@ -1669,6 +1673,12 @@ int fit_image_load(bootm_headers_t *images, ulong addr,
>   		return -ENOEXEC;
>   	}
>   #endif
> +
> +#ifndef USE_HOSTCC
> +	fit_image_get_arch(fit, noffset, &os_arch);
> +	images->os.arch = os_arch;
> +#endif
> +
>   	if (image_type == IH_TYPE_FLATDT &&
>   	    !fit_image_check_comp(fit, noffset, IH_COMP_NONE)) {
>   		puts("FDT image is compressed");
Alison Wang May 20, 2016, 6:53 a.m. UTC | #2
> On 05/19/2016 10:26 AM, Alison Wang wrote:
> > To support loading a 32-bit OS, the execution state will change from
> > AArch64 to AArch32 when jumping to kernel.
> >
> > The architecture information will be got through checking FIT image,
> > then U-Boot will load 32-bit OS or 64-bit OS automatically.
> >
> > Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
> > Signed-off-by: Alison Wang <alison.wang@nxp.com>
> > Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
> > ---
> > Changes in v2:
> > - armv8_switch_to_el2_aarch32() is removed. armv8_switch_to_el2_m is
> used
> >    to switch to AArch64 EL2 or AArch32 Hyp.
> > - armv8_switch_to_el1_aarch32() is removed. armv8_switch_to_el1_m is
> used
> >    to switch to AArch64 EL1 or AArch32 SVC.
> >
> >   arch/arm/cpu/armv8/transition.S |  8 ++---
> >   arch/arm/include/asm/macro.h    | 80
> +++++++++++++++++++++++++++++++++++++----
> >   arch/arm/include/asm/system.h   | 25 +++++++++++--
> >   arch/arm/lib/bootm.c            | 26 +++++++++++---
> >   common/image-fit.c              | 12 ++++++-
> >   5 files changed, 133 insertions(+), 18 deletions(-)
> >
> > diff --git a/arch/arm/cpu/armv8/transition.S
> > b/arch/arm/cpu/armv8/transition.S index 253a39b..9fa3b59 100644
> > --- a/arch/arm/cpu/armv8/transition.S
> > +++ b/arch/arm/cpu/armv8/transition.S
> > @@ -11,13 +11,13 @@
> >   #include <asm/macro.h>
> >
> >   ENTRY(armv8_switch_to_el2)
> > -	switch_el x0, 1f, 0f, 0f
> > +	switch_el x4, 1f, 0f, 0f
> >   0:	ret
> > -1:	armv8_switch_to_el2_m x0
> > +1:	armv8_switch_to_el2_m x0, x1, x2, x3, x4, x5, x6
> >   ENDPROC(armv8_switch_to_el2)
> >
> >   ENTRY(armv8_switch_to_el1)
> > -	switch_el x0, 0f, 1f, 0f
> > +	switch_el x4, 0f, 1f, 0f
> >   0:	ret
> > -1:	armv8_switch_to_el1_m x0, x1
> > +1:	armv8_switch_to_el1_m x0, x1, x2, x3, x4, x5, x6
> >   ENDPROC(armv8_switch_to_el1)
> > diff --git a/arch/arm/include/asm/macro.h
> > b/arch/arm/include/asm/macro.h index 9bb0efa..56d77f6 100644
> > --- a/arch/arm/include/asm/macro.h
> > +++ b/arch/arm/include/asm/macro.h
> > @@ -8,6 +8,9 @@
> >
> >   #ifndef __ASM_ARM_MACRO_H__
> >   #define __ASM_ARM_MACRO_H__
> > +
> > +#include <asm/system.h>
> > +
> >   #ifdef __ASSEMBLY__
> >
> >   /*
> > @@ -135,9 +138,20 @@ lr	.req	x30
> >   #endif
> >   .endm
> >
> > -.macro armv8_switch_to_el2_m, xreg1
> > -	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1
> */
> > -	mov	\xreg1, #0x5b1
> > +.macro armv8_switch_to_el2_m, xreg1, xreg2, xreg3, xreg4, xreg5,
> > +xreg6, xreg7
> 
> These arguments should probably get documented :). Also xreg4 doesn't
> seem to actually be a scratch register, but a real parameter. So please
> name it accordingly.
[Alison Wang] Ok, I will add the descriptions for these arguments. Not only
xreg4, xreg1, xreg2 and xreg3 are real parameters too. I will rename them in
the next version.
> 
> > +	mov	\xreg5, \xreg1
> > +	mov	\xreg6, \xreg2
> > +	mov	\xreg7, \xreg3
> > +
> > +	/*
> > +	 * If the next lower exception level is AArch64, 64bit EL2 | HCE |
> SMD |
> > +	 * RES1 (Bits[5:4]) | Non-secure EL0/EL1. If the next lower
> exception
> > +	 * level is AArch32, 32bit EL2 | HCE | SMD | RES1 (Bits[5:4]) |
> > +	 * Non-secure EL0/EL1.
> > +	 */
> > +	mov	\xreg1, #0x1b1
> > +	lsl	\xreg2, \xreg4, #10
> > +	orr	\xreg1, \xreg1, \xreg2
> 
> Is there any way you can make this obvious from the code? Basically we
> would want something like
> 
> mov \xreg1, #(SCR_EL3_EL2_AA64 | SCR_EL3_HCE | SCR_EL3_SMD ...)
> 
> I'm sure there are creative ways to achieve this even with the high bits
> set. Maybe ldr = works?
[Alison Wang] Yes, I will change them and make them obvious.
> 
> >   	msr	scr_el3, \xreg1
> >   	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3
> */
> >   	mov	\xreg1, #0x33ff
> > @@ -156,18 +170,46 @@ lr	.req	x30
> >   	movk	\xreg1, #0x30C5, lsl #16
> >   	msr	sctlr_el2, \xreg1
> >
> > -	/* Return to the EL2_SP2 mode from EL3 */
> >   	mov	\xreg1, sp
> >   	msr	sp_el2, \xreg1		/* Migrate SP */
> >   	mrs	\xreg1, vbar_el3
> >   	msr	vbar_el2, \xreg1	/* Migrate VBAR */
> > +
> > +	/* Check switch to AArch64 EL2 or AArch32 Hypervisor mode */
> > +	ldr	\xreg1, =ES_TO_AARCH64
> > +	cmp	\xreg4, \xreg1
> > +	b.eq	1f
> > +	b.ne	2f
> 
> Just b.ne should be enough, no?
[Alison Wang] Yes, you are right.
> 
> > +
> > +1:
> > +	/* Return to the EL2_SP2 mode from EL3 */
> >   	mov	\xreg1, #0x3c9
> 
> Magic values again :). Please convert them to constants getting ORed.
[Alison Wang] Ok, I will change the original magic values too.
> 
> >   	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
> >   	msr	elr_el3, lr
> > +
> > +	mov	\xreg1, \xreg5
> > +	mov	\xreg2, \xreg6
> > +	mov	\xreg3, \xreg7
> > +	eret
> > +
> > +2:
> > +	/* Return to AArch32 Hypervisor mode */
> > +	mov     \xreg1, #0x1da
> > +	msr	spsr_el3, \xreg1	/* Hyp | D | A | I | F */
> > +	msr     elr_el3, \xreg5
> > +
> > +	mov	\xreg1, #0
> > +	mov	\xreg2, \xreg6
> > +	mov	\xreg3, \xreg7
> >   	eret
> > +
> >   .endm
> >
> > -.macro armv8_switch_to_el1_m, xreg1, xreg2
> > +.macro armv8_switch_to_el1_m, xreg1, xreg2, xreg3, xreg4, xreg5,
> xreg6, xreg7
> > +	mov	\xreg5, \xreg1
> > +	mov	\xreg6, \xreg2
> > +	mov	\xreg7, \xreg3
> > +
> >   	/* Initialize Generic Timers */
> >   	mrs	\xreg1, cnthctl_el2
> >   	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to timers */
> > @@ -188,7 +230,7 @@ lr	.req	x30
> >   	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
> >
> >   	/* Initialize HCR_EL2 */
> > -	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
> > +	lsl	\xreg1, \xreg4, #31
> 
> This deserves a comment. Imagine you read this code again in 5 years.
> Would you still understand what's going on?
[Alison Wang] Ok, I will add a comment.
> 
> >   	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
> >   	msr	hcr_el2, \xreg1
> >
> > @@ -203,15 +245,39 @@ lr	.req	x30
> >   	movk	\xreg1, #0x30d0, lsl #16
> >   	msr	sctlr_el1, \xreg1
> >
> > -	/* Return to the EL1_SP1 mode from EL2 */
> >   	mov	\xreg1, sp
> >   	msr	sp_el1, \xreg1		/* Migrate SP */
> >   	mrs	\xreg1, vbar_el2
> >   	msr	vbar_el1, \xreg1	/* Migrate VBAR */
> > +
> > +	/* Check switch to AArch64 EL1 or AArch32 Supervisor mode */
> > +	ldr	\xreg1, =ES_TO_AARCH64
> > +	cmp	\xreg4, \xreg1
> > +	b.eq	1f
> > +	b.ne	2f
> > +
> > +1:
> > +	/* Return to the EL1_SP1 mode from EL2 */
> >   	mov	\xreg1, #0x3c5
> >   	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
> >   	msr	elr_el2, lr
> > +
> > +	mov	\xreg1, \xreg5
> > +	mov	\xreg2, \xreg6
> > +	mov	\xreg3, \xreg7
> > +	eret
> > +
> > +2:
> > +	/* Return to the Supervisor mode from EL2 */
> > +	mov     \xreg1, #0x1d3
> > +	msr     spsr_el2, \xreg1        /* Supervisor | D | A | I | F */
> > +	msr     elr_el2, \xreg5
> > +
> > +	mov	\xreg1, #0
> > +	mov	\xreg2, \xreg6
> > +	mov	\xreg3, \xreg7
> >   	eret
> > +
> >   .endm
> >
> >   #if defined(CONFIG_GICV3)
> > diff --git a/arch/arm/include/asm/system.h
> > b/arch/arm/include/asm/system.h index 9ae890a..fcfe3ae 100644
> > --- a/arch/arm/include/asm/system.h
> > +++ b/arch/arm/include/asm/system.h
> > @@ -17,6 +17,9 @@
> >   #define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
> >   #define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
> >
> > +#define ES_TO_AARCH64			1
> > +#define ES_TO_AARCH32			0
> > +
> >   #ifndef __ASSEMBLY__
> >
> >   u64 get_page_table_size(void);
> > @@ -100,8 +103,26 @@ void __asm_invalidate_icache_all(void);
> >   int __asm_flush_l3_cache(void);
> >   void __asm_switch_ttbr(u64 new_ttbr);
> >
> > -void armv8_switch_to_el2(void);
> > -void armv8_switch_to_el1(void);
> > +/*
> > + * Switch from EL3 to EL2 for ARMv8
> > + *
> > + * @entry_point: kernel entry point
> > + * @mach_nr:     machine nr
> > + * @fdt_addr:    fdt address
> > + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
> > + */
> > +void armv8_switch_to_el2(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> > +			 u64 es_flag);
> > +/*
> > + * Switch from EL2 to EL1 for ARMv8
> > + *
> > + * @entry_point: kernel entry point
> > + * @mach_nr:     machine nr
> > + * @fdt_addr:    fdt address
> > + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
> > + */
> > +void armv8_switch_to_el1(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> > +			 u64 es_flag);
> >   void gic_init(void);
> >   void gic_send_sgi(unsigned long sgino);
> >   void wait_for_wakeup(void);
> > diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c index
> > 0838d89..f6f685a 100644
> > --- a/arch/arm/lib/bootm.c
> > +++ b/arch/arm/lib/bootm.c
> > @@ -193,9 +193,9 @@ static void do_nonsec_virt_switch(void)
> >   {
> >   	smp_kick_all_cpus();
> >   	dcache_disable();	/* flush cache before swtiching to EL2 */
> > -	armv8_switch_to_el2();
> > +	armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
> 
> Just rename the existing function to armv8_switch_to_el2_adv() or so and
> provide armv8_switch_to_el2() as static inline function that simply
> invokes armv8_switch_to_el2_adv(0, 0, 0, ES_TO_AARCH64);
[Alison Wang] I didn't know why you suggest to do so. Please explain it.
Thanks.
> 
> >   #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> > -	armv8_switch_to_el1();
> > +	armv8_switch_to_el1(0, 0, 0, ES_TO_AARCH64);
> >   #endif
> >   }
> >   #endif
> > @@ -286,8 +286,26 @@ static void boot_jump_linux(bootm_headers_t
> *images, int flag)
> >   	announce_and_cleanup(fake);
> >
> >   	if (!fake) {
> > -		do_nonsec_virt_switch();
> > -		kernel_entry(images->ft_addr, NULL, NULL, NULL);
> > +		if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
> > +		    (images->os.arch == IH_ARCH_ARM)) {
> > +			smp_kick_all_cpus();
> > +			dcache_disable();
> 
> This open codes part of do_nonsec_virt_switch().
> 
> Could we just clean up all of that mess a bit more maybe? How about you
> call
> 
> do_nonsec_virt_switch(ES_EL2 | ES_AARCH32, entry, images->ft_addr) or
> do_nonsec_virt_switch(ES_EL1 | ES_AARCH64, entry, images->ft_addr)
> 
> Then the if() would only need to set ES_AARCH32/64 in a variable and
> everything else goes along the same code.
[Alison Wang] It sounds fine. I will try to do it in the next version.
> 
> > +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> > +			armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
> > +			armv8_switch_to_el1((u64)images->ep,
> > +					    (u64)gd->bd->bi_arch_number,
> > +					    (u64)images->ft_addr,
> > +					    ES_TO_AARCH32);
> > +#else
> > +			armv8_switch_to_el2((u64)images->ep,
> > +					    (u64)gd->bd->bi_arch_number,
> > +					    (u64)images->ft_addr,
> > +					    ES_TO_AARCH32);
> > +#endif
> > +		} else {
> > +			do_nonsec_virt_switch();
> > +			kernel_entry(images->ft_addr, NULL, NULL, NULL);
> > +		}
> >   	}
> >   #else
> >   	unsigned long machid = gd->bd->bi_arch_number; diff --git
> > a/common/image-fit.c b/common/image-fit.c index 25f8a11..2986469
> > 100644
> > --- a/common/image-fit.c
> > +++ b/common/image-fit.c
> > @@ -1163,7 +1163,8 @@ int fit_image_check_arch(const void *fit, int
> noffset, uint8_t arch)
> >   	if (fit_image_get_arch(fit, noffset, &image_arch))
> >   		return 0;
> >   	return (arch == image_arch) ||
> > -		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
> > +		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64) ||
> > +		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);
> 
> This should be a Kconfig option. ThunderX doesn't support AArch32
> execution.
[Alison Wang] I can't understand your meaning. Can you clarify it?
> 
> >   }
> >
> >   /**
> > @@ -1586,6 +1587,9 @@ int fit_image_load(bootm_headers_t *images,
> ulong addr,
> >   	int type_ok, os_ok;
> >   	ulong load, data, len;
> >   	uint8_t os;
> > +#ifndef USE_HOSTCC
> > +	uint8_t os_arch;
> > +#endif
> >   	const char *prop_name;
> >   	int ret;
> >
> > @@ -1669,6 +1673,12 @@ int fit_image_load(bootm_headers_t *images,
> ulong addr,
> >   		return -ENOEXEC;
> >   	}
> >   #endif
> > +
> > +#ifndef USE_HOSTCC
> > +	fit_image_get_arch(fit, noffset, &os_arch);
> > +	images->os.arch = os_arch;
> > +#endif
> > +
> >   	if (image_type == IH_TYPE_FLATDT &&
> >   	    !fit_image_check_comp(fit, noffset, IH_COMP_NONE)) {
> >   		puts("FDT image is compressed");

Thanks a lot for your review.


Best Regards,
Alison Wang
Alexander Graf May 20, 2016, 8:09 a.m. UTC | #3
On 20.05.16 08:53, Huan Wang wrote:
>> On 05/19/2016 10:26 AM, Alison Wang wrote:
>>> To support loading a 32-bit OS, the execution state will change from
>>> AArch64 to AArch32 when jumping to kernel.
>>>
>>> The architecture information will be got through checking FIT image,
>>> then U-Boot will load 32-bit OS or 64-bit OS automatically.
>>>
>>> Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
>>> Signed-off-by: Alison Wang <alison.wang@nxp.com>
>>> Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
>>> ---
>>> Changes in v2:
>>> - armv8_switch_to_el2_aarch32() is removed. armv8_switch_to_el2_m is
>> used
>>>    to switch to AArch64 EL2 or AArch32 Hyp.
>>> - armv8_switch_to_el1_aarch32() is removed. armv8_switch_to_el1_m is
>> used
>>>    to switch to AArch64 EL1 or AArch32 SVC.
>>>
>>>   arch/arm/cpu/armv8/transition.S |  8 ++---
>>>   arch/arm/include/asm/macro.h    | 80
>> +++++++++++++++++++++++++++++++++++++----
>>>   arch/arm/include/asm/system.h   | 25 +++++++++++--
>>>   arch/arm/lib/bootm.c            | 26 +++++++++++---
>>>   common/image-fit.c              | 12 ++++++-
>>>   5 files changed, 133 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/arch/arm/cpu/armv8/transition.S
>>> b/arch/arm/cpu/armv8/transition.S index 253a39b..9fa3b59 100644
>>> --- a/arch/arm/cpu/armv8/transition.S
>>> +++ b/arch/arm/cpu/armv8/transition.S
>>> @@ -11,13 +11,13 @@
>>>   #include <asm/macro.h>
>>>
>>>   ENTRY(armv8_switch_to_el2)
>>> -	switch_el x0, 1f, 0f, 0f
>>> +	switch_el x4, 1f, 0f, 0f
>>>   0:	ret
>>> -1:	armv8_switch_to_el2_m x0
>>> +1:	armv8_switch_to_el2_m x0, x1, x2, x3, x4, x5, x6
>>>   ENDPROC(armv8_switch_to_el2)
>>>
>>>   ENTRY(armv8_switch_to_el1)
>>> -	switch_el x0, 0f, 1f, 0f
>>> +	switch_el x4, 0f, 1f, 0f
>>>   0:	ret
>>> -1:	armv8_switch_to_el1_m x0, x1
>>> +1:	armv8_switch_to_el1_m x0, x1, x2, x3, x4, x5, x6
>>>   ENDPROC(armv8_switch_to_el1)
>>> diff --git a/arch/arm/include/asm/macro.h
>>> b/arch/arm/include/asm/macro.h index 9bb0efa..56d77f6 100644
>>> --- a/arch/arm/include/asm/macro.h
>>> +++ b/arch/arm/include/asm/macro.h
>>> @@ -8,6 +8,9 @@
>>>
>>>   #ifndef __ASM_ARM_MACRO_H__
>>>   #define __ASM_ARM_MACRO_H__
>>> +
>>> +#include <asm/system.h>
>>> +
>>>   #ifdef __ASSEMBLY__
>>>
>>>   /*
>>> @@ -135,9 +138,20 @@ lr	.req	x30
>>>   #endif
>>>   .endm
>>>
>>> -.macro armv8_switch_to_el2_m, xreg1
>>> -	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1
>> */
>>> -	mov	\xreg1, #0x5b1
>>> +.macro armv8_switch_to_el2_m, xreg1, xreg2, xreg3, xreg4, xreg5,
>>> +xreg6, xreg7
>>
>> These arguments should probably get documented :). Also xreg4 doesn't
>> seem to actually be a scratch register, but a real parameter. So please
>> name it accordingly.
> [Alison Wang] Ok, I will add the descriptions for these arguments. Not only
> xreg4, xreg1, xreg2 and xreg3 are real parameters too. I will rename them in
> the next version.
>>
>>> +	mov	\xreg5, \xreg1
>>> +	mov	\xreg6, \xreg2
>>> +	mov	\xreg7, \xreg3
>>> +
>>> +	/*
>>> +	 * If the next lower exception level is AArch64, 64bit EL2 | HCE |
>> SMD |
>>> +	 * RES1 (Bits[5:4]) | Non-secure EL0/EL1. If the next lower
>> exception
>>> +	 * level is AArch32, 32bit EL2 | HCE | SMD | RES1 (Bits[5:4]) |
>>> +	 * Non-secure EL0/EL1.
>>> +	 */
>>> +	mov	\xreg1, #0x1b1
>>> +	lsl	\xreg2, \xreg4, #10
>>> +	orr	\xreg1, \xreg1, \xreg2
>>
>> Is there any way you can make this obvious from the code? Basically we
>> would want something like
>>
>> mov \xreg1, #(SCR_EL3_EL2_AA64 | SCR_EL3_HCE | SCR_EL3_SMD ...)
>>
>> I'm sure there are creative ways to achieve this even with the high bits
>> set. Maybe ldr = works?
> [Alison Wang] Yes, I will change them and make them obvious.
>>
>>>   	msr	scr_el3, \xreg1
>>>   	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3
>> */
>>>   	mov	\xreg1, #0x33ff
>>> @@ -156,18 +170,46 @@ lr	.req	x30
>>>   	movk	\xreg1, #0x30C5, lsl #16
>>>   	msr	sctlr_el2, \xreg1
>>>
>>> -	/* Return to the EL2_SP2 mode from EL3 */
>>>   	mov	\xreg1, sp
>>>   	msr	sp_el2, \xreg1		/* Migrate SP */
>>>   	mrs	\xreg1, vbar_el3
>>>   	msr	vbar_el2, \xreg1	/* Migrate VBAR */
>>> +
>>> +	/* Check switch to AArch64 EL2 or AArch32 Hypervisor mode */
>>> +	ldr	\xreg1, =ES_TO_AARCH64
>>> +	cmp	\xreg4, \xreg1
>>> +	b.eq	1f
>>> +	b.ne	2f
>>
>> Just b.ne should be enough, no?
> [Alison Wang] Yes, you are right.
>>
>>> +
>>> +1:
>>> +	/* Return to the EL2_SP2 mode from EL3 */
>>>   	mov	\xreg1, #0x3c9
>>
>> Magic values again :). Please convert them to constants getting ORed.
> [Alison Wang] Ok, I will change the original magic values too.
>>
>>>   	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
>>>   	msr	elr_el3, lr
>>> +
>>> +	mov	\xreg1, \xreg5
>>> +	mov	\xreg2, \xreg6
>>> +	mov	\xreg3, \xreg7
>>> +	eret
>>> +
>>> +2:
>>> +	/* Return to AArch32 Hypervisor mode */
>>> +	mov     \xreg1, #0x1da
>>> +	msr	spsr_el3, \xreg1	/* Hyp | D | A | I | F */
>>> +	msr     elr_el3, \xreg5
>>> +
>>> +	mov	\xreg1, #0
>>> +	mov	\xreg2, \xreg6
>>> +	mov	\xreg3, \xreg7
>>>   	eret
>>> +
>>>   .endm
>>>
>>> -.macro armv8_switch_to_el1_m, xreg1, xreg2
>>> +.macro armv8_switch_to_el1_m, xreg1, xreg2, xreg3, xreg4, xreg5,
>> xreg6, xreg7
>>> +	mov	\xreg5, \xreg1
>>> +	mov	\xreg6, \xreg2
>>> +	mov	\xreg7, \xreg3
>>> +
>>>   	/* Initialize Generic Timers */
>>>   	mrs	\xreg1, cnthctl_el2
>>>   	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to timers */
>>> @@ -188,7 +230,7 @@ lr	.req	x30
>>>   	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
>>>
>>>   	/* Initialize HCR_EL2 */
>>> -	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
>>> +	lsl	\xreg1, \xreg4, #31
>>
>> This deserves a comment. Imagine you read this code again in 5 years.
>> Would you still understand what's going on?
> [Alison Wang] Ok, I will add a comment.
>>
>>>   	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
>>>   	msr	hcr_el2, \xreg1
>>>
>>> @@ -203,15 +245,39 @@ lr	.req	x30
>>>   	movk	\xreg1, #0x30d0, lsl #16
>>>   	msr	sctlr_el1, \xreg1
>>>
>>> -	/* Return to the EL1_SP1 mode from EL2 */
>>>   	mov	\xreg1, sp
>>>   	msr	sp_el1, \xreg1		/* Migrate SP */
>>>   	mrs	\xreg1, vbar_el2
>>>   	msr	vbar_el1, \xreg1	/* Migrate VBAR */
>>> +
>>> +	/* Check switch to AArch64 EL1 or AArch32 Supervisor mode */
>>> +	ldr	\xreg1, =ES_TO_AARCH64
>>> +	cmp	\xreg4, \xreg1
>>> +	b.eq	1f
>>> +	b.ne	2f
>>> +
>>> +1:
>>> +	/* Return to the EL1_SP1 mode from EL2 */
>>>   	mov	\xreg1, #0x3c5
>>>   	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
>>>   	msr	elr_el2, lr
>>> +
>>> +	mov	\xreg1, \xreg5
>>> +	mov	\xreg2, \xreg6
>>> +	mov	\xreg3, \xreg7
>>> +	eret
>>> +
>>> +2:
>>> +	/* Return to the Supervisor mode from EL2 */
>>> +	mov     \xreg1, #0x1d3
>>> +	msr     spsr_el2, \xreg1        /* Supervisor | D | A | I | F */
>>> +	msr     elr_el2, \xreg5
>>> +
>>> +	mov	\xreg1, #0
>>> +	mov	\xreg2, \xreg6
>>> +	mov	\xreg3, \xreg7
>>>   	eret
>>> +
>>>   .endm
>>>
>>>   #if defined(CONFIG_GICV3)
>>> diff --git a/arch/arm/include/asm/system.h
>>> b/arch/arm/include/asm/system.h index 9ae890a..fcfe3ae 100644
>>> --- a/arch/arm/include/asm/system.h
>>> +++ b/arch/arm/include/asm/system.h
>>> @@ -17,6 +17,9 @@
>>>   #define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
>>>   #define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
>>>
>>> +#define ES_TO_AARCH64			1
>>> +#define ES_TO_AARCH32			0
>>> +
>>>   #ifndef __ASSEMBLY__
>>>
>>>   u64 get_page_table_size(void);
>>> @@ -100,8 +103,26 @@ void __asm_invalidate_icache_all(void);
>>>   int __asm_flush_l3_cache(void);
>>>   void __asm_switch_ttbr(u64 new_ttbr);
>>>
>>> -void armv8_switch_to_el2(void);
>>> -void armv8_switch_to_el1(void);
>>> +/*
>>> + * Switch from EL3 to EL2 for ARMv8
>>> + *
>>> + * @entry_point: kernel entry point
>>> + * @mach_nr:     machine nr
>>> + * @fdt_addr:    fdt address
>>> + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
>>> + */
>>> +void armv8_switch_to_el2(u64 entry_point, u64 mach_nr, u64 fdt_addr,
>>> +			 u64 es_flag);
>>> +/*
>>> + * Switch from EL2 to EL1 for ARMv8
>>> + *
>>> + * @entry_point: kernel entry point
>>> + * @mach_nr:     machine nr
>>> + * @fdt_addr:    fdt address
>>> + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
>>> + */
>>> +void armv8_switch_to_el1(u64 entry_point, u64 mach_nr, u64 fdt_addr,
>>> +			 u64 es_flag);
>>>   void gic_init(void);
>>>   void gic_send_sgi(unsigned long sgino);
>>>   void wait_for_wakeup(void);
>>> diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c index
>>> 0838d89..f6f685a 100644
>>> --- a/arch/arm/lib/bootm.c
>>> +++ b/arch/arm/lib/bootm.c
>>> @@ -193,9 +193,9 @@ static void do_nonsec_virt_switch(void)
>>>   {
>>>   	smp_kick_all_cpus();
>>>   	dcache_disable();	/* flush cache before swtiching to EL2 */
>>> -	armv8_switch_to_el2();
>>> +	armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
>>
>> Just rename the existing function to armv8_switch_to_el2_adv() or so and
>> provide armv8_switch_to_el2() as static inline function that simply
>> invokes armv8_switch_to_el2_adv(0, 0, 0, ES_TO_AARCH64);
> [Alison Wang] I didn't know why you suggest to do so. Please explain it.
> Thanks.

I guess with my suggestion below this wouldn't make much sense anymore,
so it's fine the way it is.

>>
>>>   #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
>>> -	armv8_switch_to_el1();
>>> +	armv8_switch_to_el1(0, 0, 0, ES_TO_AARCH64);
>>>   #endif
>>>   }
>>>   #endif
>>> @@ -286,8 +286,26 @@ static void boot_jump_linux(bootm_headers_t
>> *images, int flag)
>>>   	announce_and_cleanup(fake);
>>>
>>>   	if (!fake) {
>>> -		do_nonsec_virt_switch();
>>> -		kernel_entry(images->ft_addr, NULL, NULL, NULL);
>>> +		if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
>>> +		    (images->os.arch == IH_ARCH_ARM)) {
>>> +			smp_kick_all_cpus();
>>> +			dcache_disable();
>>
>> This open codes part of do_nonsec_virt_switch().
>>
>> Could we just clean up all of that mess a bit more maybe? How about you
>> call
>>
>> do_nonsec_virt_switch(ES_EL2 | ES_AARCH32, entry, images->ft_addr) or
>> do_nonsec_virt_switch(ES_EL1 | ES_AARCH64, entry, images->ft_addr)
>>
>> Then the if() would only need to set ES_AARCH32/64 in a variable and
>> everything else goes along the same code.
> [Alison Wang] It sounds fine. I will try to do it in the next version.
>>
>>> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
>>> +			armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
>>> +			armv8_switch_to_el1((u64)images->ep,
>>> +					    (u64)gd->bd->bi_arch_number,
>>> +					    (u64)images->ft_addr,
>>> +					    ES_TO_AARCH32);
>>> +#else
>>> +			armv8_switch_to_el2((u64)images->ep,
>>> +					    (u64)gd->bd->bi_arch_number,
>>> +					    (u64)images->ft_addr,
>>> +					    ES_TO_AARCH32);
>>> +#endif
>>> +		} else {
>>> +			do_nonsec_virt_switch();
>>> +			kernel_entry(images->ft_addr, NULL, NULL, NULL);
>>> +		}
>>>   	}
>>>   #else
>>>   	unsigned long machid = gd->bd->bi_arch_number; diff --git
>>> a/common/image-fit.c b/common/image-fit.c index 25f8a11..2986469
>>> 100644
>>> --- a/common/image-fit.c
>>> +++ b/common/image-fit.c
>>> @@ -1163,7 +1163,8 @@ int fit_image_check_arch(const void *fit, int
>> noffset, uint8_t arch)
>>>   	if (fit_image_get_arch(fit, noffset, &image_arch))
>>>   		return 0;
>>>   	return (arch == image_arch) ||
>>> -		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
>>> +		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64) ||
>>> +		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);
>>
>> This should be a Kconfig option. ThunderX doesn't support AArch32
>> execution.
> [Alison Wang] I can't understand your meaning. Can you clarify it?

U-Boot supports the ThunderX SoC which uses Cavium's own ARMv8 core
implementation. That core does not support running AArch32 code.


Alex
Alison Wang May 20, 2016, 8:26 a.m. UTC | #4
> On 20.05.16 08:53, Huan Wang wrote:
> >> On 05/19/2016 10:26 AM, Alison Wang wrote:
> >>> To support loading a 32-bit OS, the execution state will change from
> >>> AArch64 to AArch32 when jumping to kernel.
> >>>
> >>> The architecture information will be got through checking FIT image,
> >>> then U-Boot will load 32-bit OS or 64-bit OS automatically.
> >>>
> >>> Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
> >>> Signed-off-by: Alison Wang <alison.wang@nxp.com>
> >>> Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
> >>> ---
> >>> Changes in v2:
> >>> - armv8_switch_to_el2_aarch32() is removed. armv8_switch_to_el2_m is
> >> used
> >>>    to switch to AArch64 EL2 or AArch32 Hyp.
> >>> - armv8_switch_to_el1_aarch32() is removed. armv8_switch_to_el1_m is
> >> used
> >>>    to switch to AArch64 EL1 or AArch32 SVC.
> >>>
> >>>   arch/arm/cpu/armv8/transition.S |  8 ++---
> >>>   arch/arm/include/asm/macro.h    | 80
> >> +++++++++++++++++++++++++++++++++++++----
> >>>   arch/arm/include/asm/system.h   | 25 +++++++++++--
> >>>   arch/arm/lib/bootm.c            | 26 +++++++++++---
> >>>   common/image-fit.c              | 12 ++++++-
> >>>   5 files changed, 133 insertions(+), 18 deletions(-)
> >>>
> >>> diff --git a/arch/arm/cpu/armv8/transition.S
> >>> b/arch/arm/cpu/armv8/transition.S index 253a39b..9fa3b59 100644
> >>> --- a/arch/arm/cpu/armv8/transition.S
> >>> +++ b/arch/arm/cpu/armv8/transition.S
> >>> @@ -11,13 +11,13 @@
> >>>   #include <asm/macro.h>
> >>>
> >>>   ENTRY(armv8_switch_to_el2)
> >>> -	switch_el x0, 1f, 0f, 0f
> >>> +	switch_el x4, 1f, 0f, 0f
> >>>   0:	ret
> >>> -1:	armv8_switch_to_el2_m x0
> >>> +1:	armv8_switch_to_el2_m x0, x1, x2, x3, x4, x5, x6
> >>>   ENDPROC(armv8_switch_to_el2)
> >>>
> >>>   ENTRY(armv8_switch_to_el1)
> >>> -	switch_el x0, 0f, 1f, 0f
> >>> +	switch_el x4, 0f, 1f, 0f
> >>>   0:	ret
> >>> -1:	armv8_switch_to_el1_m x0, x1
> >>> +1:	armv8_switch_to_el1_m x0, x1, x2, x3, x4, x5, x6
> >>>   ENDPROC(armv8_switch_to_el1)
> >>> diff --git a/arch/arm/include/asm/macro.h
> >>> b/arch/arm/include/asm/macro.h index 9bb0efa..56d77f6 100644
> >>> --- a/arch/arm/include/asm/macro.h
> >>> +++ b/arch/arm/include/asm/macro.h
> >>> @@ -8,6 +8,9 @@
> >>>
> >>>   #ifndef __ASM_ARM_MACRO_H__
> >>>   #define __ASM_ARM_MACRO_H__
> >>> +
> >>> +#include <asm/system.h>
> >>> +
> >>>   #ifdef __ASSEMBLY__
> >>>
> >>>   /*
> >>> @@ -135,9 +138,20 @@ lr	.req	x30
> >>>   #endif
> >>>   .endm
> >>>
> >>> -.macro armv8_switch_to_el2_m, xreg1
> >>> -	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1
> >> */
> >>> -	mov	\xreg1, #0x5b1
> >>> +.macro armv8_switch_to_el2_m, xreg1, xreg2, xreg3, xreg4, xreg5,
> >>> +xreg6, xreg7
> >>
> >> These arguments should probably get documented :). Also xreg4 doesn't
> >> seem to actually be a scratch register, but a real parameter. So
> >> please name it accordingly.
> > [Alison Wang] Ok, I will add the descriptions for these arguments. Not
> > only xreg4, xreg1, xreg2 and xreg3 are real parameters too. I will
> > rename them in the next version.
> >>
> >>> +	mov	\xreg5, \xreg1
> >>> +	mov	\xreg6, \xreg2
> >>> +	mov	\xreg7, \xreg3
> >>> +
> >>> +	/*
> >>> +	 * If the next lower exception level is AArch64, 64bit EL2 | HCE |
> >> SMD |
> >>> +	 * RES1 (Bits[5:4]) | Non-secure EL0/EL1. If the next lower
> >> exception
> >>> +	 * level is AArch32, 32bit EL2 | HCE | SMD | RES1 (Bits[5:4]) |
> >>> +	 * Non-secure EL0/EL1.
> >>> +	 */
> >>> +	mov	\xreg1, #0x1b1
> >>> +	lsl	\xreg2, \xreg4, #10
> >>> +	orr	\xreg1, \xreg1, \xreg2
> >>
> >> Is there any way you can make this obvious from the code? Basically
> >> we would want something like
> >>
> >> mov \xreg1, #(SCR_EL3_EL2_AA64 | SCR_EL3_HCE | SCR_EL3_SMD ...)
> >>
> >> I'm sure there are creative ways to achieve this even with the high
> >> bits set. Maybe ldr = works?
> > [Alison Wang] Yes, I will change them and make them obvious.
> >>
> >>>   	msr	scr_el3, \xreg1
> >>>   	msr	cptr_el3, xzr		/* Disable coprocessor traps
> to EL3
> >> */
> >>>   	mov	\xreg1, #0x33ff
> >>> @@ -156,18 +170,46 @@ lr	.req	x30
> >>>   	movk	\xreg1, #0x30C5, lsl #16
> >>>   	msr	sctlr_el2, \xreg1
> >>>
> >>> -	/* Return to the EL2_SP2 mode from EL3 */
> >>>   	mov	\xreg1, sp
> >>>   	msr	sp_el2, \xreg1		/* Migrate SP */
> >>>   	mrs	\xreg1, vbar_el3
> >>>   	msr	vbar_el2, \xreg1	/* Migrate VBAR */
> >>> +
> >>> +	/* Check switch to AArch64 EL2 or AArch32 Hypervisor mode */
> >>> +	ldr	\xreg1, =ES_TO_AARCH64
> >>> +	cmp	\xreg4, \xreg1
> >>> +	b.eq	1f
> >>> +	b.ne	2f
> >>
> >> Just b.ne should be enough, no?
> > [Alison Wang] Yes, you are right.
> >>
> >>> +
> >>> +1:
> >>> +	/* Return to the EL2_SP2 mode from EL3 */
> >>>   	mov	\xreg1, #0x3c9
> >>
> >> Magic values again :). Please convert them to constants getting ORed.
> > [Alison Wang] Ok, I will change the original magic values too.
> >>
> >>>   	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
> >>>   	msr	elr_el3, lr
> >>> +
> >>> +	mov	\xreg1, \xreg5
> >>> +	mov	\xreg2, \xreg6
> >>> +	mov	\xreg3, \xreg7
> >>> +	eret
> >>> +
> >>> +2:
> >>> +	/* Return to AArch32 Hypervisor mode */
> >>> +	mov     \xreg1, #0x1da
> >>> +	msr	spsr_el3, \xreg1	/* Hyp | D | A | I | F */
> >>> +	msr     elr_el3, \xreg5
> >>> +
> >>> +	mov	\xreg1, #0
> >>> +	mov	\xreg2, \xreg6
> >>> +	mov	\xreg3, \xreg7
> >>>   	eret
> >>> +
> >>>   .endm
> >>>
> >>> -.macro armv8_switch_to_el1_m, xreg1, xreg2
> >>> +.macro armv8_switch_to_el1_m, xreg1, xreg2, xreg3, xreg4, xreg5,
> >> xreg6, xreg7
> >>> +	mov	\xreg5, \xreg1
> >>> +	mov	\xreg6, \xreg2
> >>> +	mov	\xreg7, \xreg3
> >>> +
> >>>   	/* Initialize Generic Timers */
> >>>   	mrs	\xreg1, cnthctl_el2
> >>>   	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to
> timers */
> >>> @@ -188,7 +230,7 @@ lr	.req	x30
> >>>   	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
> >>>
> >>>   	/* Initialize HCR_EL2 */
> >>> -	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
> >>> +	lsl	\xreg1, \xreg4, #31
> >>
> >> This deserves a comment. Imagine you read this code again in 5 years.
> >> Would you still understand what's going on?
> > [Alison Wang] Ok, I will add a comment.
> >>
> >>>   	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
> >>>   	msr	hcr_el2, \xreg1
> >>>
> >>> @@ -203,15 +245,39 @@ lr	.req	x30
> >>>   	movk	\xreg1, #0x30d0, lsl #16
> >>>   	msr	sctlr_el1, \xreg1
> >>>
> >>> -	/* Return to the EL1_SP1 mode from EL2 */
> >>>   	mov	\xreg1, sp
> >>>   	msr	sp_el1, \xreg1		/* Migrate SP */
> >>>   	mrs	\xreg1, vbar_el2
> >>>   	msr	vbar_el1, \xreg1	/* Migrate VBAR */
> >>> +
> >>> +	/* Check switch to AArch64 EL1 or AArch32 Supervisor mode */
> >>> +	ldr	\xreg1, =ES_TO_AARCH64
> >>> +	cmp	\xreg4, \xreg1
> >>> +	b.eq	1f
> >>> +	b.ne	2f
> >>> +
> >>> +1:
> >>> +	/* Return to the EL1_SP1 mode from EL2 */
> >>>   	mov	\xreg1, #0x3c5
> >>>   	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
> >>>   	msr	elr_el2, lr
> >>> +
> >>> +	mov	\xreg1, \xreg5
> >>> +	mov	\xreg2, \xreg6
> >>> +	mov	\xreg3, \xreg7
> >>> +	eret
> >>> +
> >>> +2:
> >>> +	/* Return to the Supervisor mode from EL2 */
> >>> +	mov     \xreg1, #0x1d3
> >>> +	msr     spsr_el2, \xreg1        /* Supervisor | D | A | I | F */
> >>> +	msr     elr_el2, \xreg5
> >>> +
> >>> +	mov	\xreg1, #0
> >>> +	mov	\xreg2, \xreg6
> >>> +	mov	\xreg3, \xreg7
> >>>   	eret
> >>> +
> >>>   .endm
> >>>
> >>>   #if defined(CONFIG_GICV3)
> >>> diff --git a/arch/arm/include/asm/system.h
> >>> b/arch/arm/include/asm/system.h index 9ae890a..fcfe3ae 100644
> >>> --- a/arch/arm/include/asm/system.h
> >>> +++ b/arch/arm/include/asm/system.h
> >>> @@ -17,6 +17,9 @@
> >>>   #define CR_WXN		(1 << 19)	/* Write Permision Imply XN
> 	*/
> >>>   #define CR_EE		(1 << 25)	/* Exception (Big) Endian
> 	*/
> >>>
> >>> +#define ES_TO_AARCH64			1
> >>> +#define ES_TO_AARCH32			0
> >>> +
> >>>   #ifndef __ASSEMBLY__
> >>>
> >>>   u64 get_page_table_size(void);
> >>> @@ -100,8 +103,26 @@ void __asm_invalidate_icache_all(void);
> >>>   int __asm_flush_l3_cache(void);
> >>>   void __asm_switch_ttbr(u64 new_ttbr);
> >>>
> >>> -void armv8_switch_to_el2(void);
> >>> -void armv8_switch_to_el1(void);
> >>> +/*
> >>> + * Switch from EL3 to EL2 for ARMv8
> >>> + *
> >>> + * @entry_point: kernel entry point
> >>> + * @mach_nr:     machine nr
> >>> + * @fdt_addr:    fdt address
> >>> + * @es_flag:     execution state flag, ES_TO_AARCH64 or
> ES_TO_AARCH32
> >>> + */
> >>> +void armv8_switch_to_el2(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> >>> +			 u64 es_flag);
> >>> +/*
> >>> + * Switch from EL2 to EL1 for ARMv8
> >>> + *
> >>> + * @entry_point: kernel entry point
> >>> + * @mach_nr:     machine nr
> >>> + * @fdt_addr:    fdt address
> >>> + * @es_flag:     execution state flag, ES_TO_AARCH64 or
> ES_TO_AARCH32
> >>> + */
> >>> +void armv8_switch_to_el1(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> >>> +			 u64 es_flag);
> >>>   void gic_init(void);
> >>>   void gic_send_sgi(unsigned long sgino);
> >>>   void wait_for_wakeup(void);
> >>> diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c index
> >>> 0838d89..f6f685a 100644
> >>> --- a/arch/arm/lib/bootm.c
> >>> +++ b/arch/arm/lib/bootm.c
> >>> @@ -193,9 +193,9 @@ static void do_nonsec_virt_switch(void)
> >>>   {
> >>>   	smp_kick_all_cpus();
> >>>   	dcache_disable();	/* flush cache before swtiching to EL2 */
> >>> -	armv8_switch_to_el2();
> >>> +	armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
> >>
> >> Just rename the existing function to armv8_switch_to_el2_adv() or so
> >> and provide armv8_switch_to_el2() as static inline function that
> >> simply invokes armv8_switch_to_el2_adv(0, 0, 0, ES_TO_AARCH64);
> > [Alison Wang] I didn't know why you suggest to do so. Please explain
> it.
> > Thanks.
> 
> I guess with my suggestion below this wouldn't make much sense anymore,
> so it's fine the way it is.
[Alison Wang] ok.
> 
> >>
> >>>   #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> >>> -	armv8_switch_to_el1();
> >>> +	armv8_switch_to_el1(0, 0, 0, ES_TO_AARCH64);
> >>>   #endif
> >>>   }
> >>>   #endif
> >>> @@ -286,8 +286,26 @@ static void boot_jump_linux(bootm_headers_t
> >> *images, int flag)
> >>>   	announce_and_cleanup(fake);
> >>>
> >>>   	if (!fake) {
> >>> -		do_nonsec_virt_switch();
> >>> -		kernel_entry(images->ft_addr, NULL, NULL, NULL);
> >>> +		if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
> >>> +		    (images->os.arch == IH_ARCH_ARM)) {
> >>> +			smp_kick_all_cpus();
> >>> +			dcache_disable();
> >>
> >> This open codes part of do_nonsec_virt_switch().
> >>
> >> Could we just clean up all of that mess a bit more maybe? How about
> >> you call
> >>
> >> do_nonsec_virt_switch(ES_EL2 | ES_AARCH32, entry, images->ft_addr) or
> >> do_nonsec_virt_switch(ES_EL1 | ES_AARCH64, entry, images->ft_addr)
> >>
> >> Then the if() would only need to set ES_AARCH32/64 in a variable and
> >> everything else goes along the same code.
> > [Alison Wang] It sounds fine. I will try to do it in the next version.
> >>
> >>> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> >>> +			armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
> >>> +			armv8_switch_to_el1((u64)images->ep,
> >>> +					    (u64)gd->bd->bi_arch_number,
> >>> +					    (u64)images->ft_addr,
> >>> +					    ES_TO_AARCH32);
> >>> +#else
> >>> +			armv8_switch_to_el2((u64)images->ep,
> >>> +					    (u64)gd->bd->bi_arch_number,
> >>> +					    (u64)images->ft_addr,
> >>> +					    ES_TO_AARCH32);
> >>> +#endif
> >>> +		} else {
> >>> +			do_nonsec_virt_switch();
> >>> +			kernel_entry(images->ft_addr, NULL, NULL, NULL);
> >>> +		}
> >>>   	}
> >>>   #else
> >>>   	unsigned long machid = gd->bd->bi_arch_number; diff --git
> >>> a/common/image-fit.c b/common/image-fit.c index 25f8a11..2986469
> >>> 100644
> >>> --- a/common/image-fit.c
> >>> +++ b/common/image-fit.c
> >>> @@ -1163,7 +1163,8 @@ int fit_image_check_arch(const void *fit, int
> >> noffset, uint8_t arch)
> >>>   	if (fit_image_get_arch(fit, noffset, &image_arch))
> >>>   		return 0;
> >>>   	return (arch == image_arch) ||
> >>> -		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
> >>> +		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64) ||
> >>> +		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);
> >>
> >> This should be a Kconfig option. ThunderX doesn't support AArch32
> >> execution.
> > [Alison Wang] I can't understand your meaning. Can you clarify it?
> 
> U-Boot supports the ThunderX SoC which uses Cavium's own ARMv8 core
> implementation. That core does not support running AArch32 code.
> 
[Alison Wang] Oh, I think your concern is that this function will not
detect FIT image is wrong when loading 32-bit kernel on ThunderX, right?

Then what is your suggestion to fix this issue?

Best Regards,
Alison Wang
Alexander Graf May 20, 2016, 8:29 a.m. UTC | #5
On 20.05.16 10:26, Huan Wang wrote:
>> On 20.05.16 08:53, Huan Wang wrote:
>>>> On 05/19/2016 10:26 AM, Alison Wang wrote:
>>>>> To support loading a 32-bit OS, the execution state will change from
>>>>> AArch64 to AArch32 when jumping to kernel.
>>>>>
>>>>> The architecture information will be got through checking FIT image,
>>>>> then U-Boot will load 32-bit OS or 64-bit OS automatically.
>>>>>
>>>>> Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
>>>>> Signed-off-by: Alison Wang <alison.wang@nxp.com>
>>>>> Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
>>>>> ---

[...]

>>>>> --- a/common/image-fit.c
>>>>> +++ b/common/image-fit.c
>>>>> @@ -1163,7 +1163,8 @@ int fit_image_check_arch(const void *fit, int
>>>> noffset, uint8_t arch)
>>>>>   	if (fit_image_get_arch(fit, noffset, &image_arch))
>>>>>   		return 0;
>>>>>   	return (arch == image_arch) ||
>>>>> -		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
>>>>> +		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64) ||
>>>>> +		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);
>>>>
>>>> This should be a Kconfig option. ThunderX doesn't support AArch32
>>>> execution.
>>> [Alison Wang] I can't understand your meaning. Can you clarify it?
>>
>> U-Boot supports the ThunderX SoC which uses Cavium's own ARMv8 core
>> implementation. That core does not support running AArch32 code.
>>
> [Alison Wang] Oh, I think your concern is that this function will not
> detect FIT image is wrong when loading 32-bit kernel on ThunderX, right?
> 
> Then what is your suggestion to fix this issue?

My suggestion is to introduce a new Kconfig option that says "this arm64
system can execute aarch32 code" which is set as default y.
CONFIG_THUNDERX would then set it to n if that's possible - or you'd
just set it to n in the thunderx defconfig.


Alex
Alison Wang May 20, 2016, 8:33 a.m. UTC | #6
> On 20.05.16 10:26, Huan Wang wrote:
> >> On 20.05.16 08:53, Huan Wang wrote:
> >>>> On 05/19/2016 10:26 AM, Alison Wang wrote:
> >>>>> To support loading a 32-bit OS, the execution state will change
> >>>>> from
> >>>>> AArch64 to AArch32 when jumping to kernel.
> >>>>>
> >>>>> The architecture information will be got through checking FIT
> >>>>> image, then U-Boot will load 32-bit OS or 64-bit OS automatically.
> >>>>>
> >>>>> Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
> >>>>> Signed-off-by: Alison Wang <alison.wang@nxp.com>
> >>>>> Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
> >>>>> ---
> 
> [...]
> 
> >>>>> --- a/common/image-fit.c
> >>>>> +++ b/common/image-fit.c
> >>>>> @@ -1163,7 +1163,8 @@ int fit_image_check_arch(const void *fit,
> >>>>> int
> >>>> noffset, uint8_t arch)
> >>>>>   	if (fit_image_get_arch(fit, noffset, &image_arch))
> >>>>>   		return 0;
> >>>>>   	return (arch == image_arch) ||
> >>>>> -		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
> >>>>> +		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64)
> ||
> >>>>> +		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);
> >>>>
> >>>> This should be a Kconfig option. ThunderX doesn't support AArch32
> >>>> execution.
> >>> [Alison Wang] I can't understand your meaning. Can you clarify it?
> >>
> >> U-Boot supports the ThunderX SoC which uses Cavium's own ARMv8 core
> >> implementation. That core does not support running AArch32 code.
> >>
> > [Alison Wang] Oh, I think your concern is that this function will not
> > detect FIT image is wrong when loading 32-bit kernel on ThunderX,
> right?
> >
> > Then what is your suggestion to fix this issue?
> 
> My suggestion is to introduce a new Kconfig option that says "this arm64
> system can execute aarch32 code" which is set as default y.
> CONFIG_THUNDERX would then set it to n if that's possible - or you'd
> just set it to n in the thunderx defconfig.
[Alison Wang] Good idea. I will do it in the next version. Thanks.

Best Regards,
Alison Wang
diff mbox

Patch

diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
index 253a39b..9fa3b59 100644
--- a/arch/arm/cpu/armv8/transition.S
+++ b/arch/arm/cpu/armv8/transition.S
@@ -11,13 +11,13 @@ 
 #include <asm/macro.h>
 
 ENTRY(armv8_switch_to_el2)
-	switch_el x0, 1f, 0f, 0f
+	switch_el x4, 1f, 0f, 0f
 0:	ret
-1:	armv8_switch_to_el2_m x0
+1:	armv8_switch_to_el2_m x0, x1, x2, x3, x4, x5, x6
 ENDPROC(armv8_switch_to_el2)
 
 ENTRY(armv8_switch_to_el1)
-	switch_el x0, 0f, 1f, 0f
+	switch_el x4, 0f, 1f, 0f
 0:	ret
-1:	armv8_switch_to_el1_m x0, x1
+1:	armv8_switch_to_el1_m x0, x1, x2, x3, x4, x5, x6
 ENDPROC(armv8_switch_to_el1)
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
index 9bb0efa..56d77f6 100644
--- a/arch/arm/include/asm/macro.h
+++ b/arch/arm/include/asm/macro.h
@@ -8,6 +8,9 @@ 
 
 #ifndef __ASM_ARM_MACRO_H__
 #define __ASM_ARM_MACRO_H__
+
+#include <asm/system.h>
+
 #ifdef __ASSEMBLY__
 
 /*
@@ -135,9 +138,20 @@  lr	.req	x30
 #endif
 .endm
 
-.macro armv8_switch_to_el2_m, xreg1
-	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1 */
-	mov	\xreg1, #0x5b1
+.macro armv8_switch_to_el2_m, xreg1, xreg2, xreg3, xreg4, xreg5, xreg6, xreg7
+	mov	\xreg5, \xreg1
+	mov	\xreg6, \xreg2
+	mov	\xreg7, \xreg3
+
+	/*
+	 * If the next lower exception level is AArch64, 64bit EL2 | HCE | SMD |
+	 * RES1 (Bits[5:4]) | Non-secure EL0/EL1. If the next lower exception
+	 * level is AArch32, 32bit EL2 | HCE | SMD | RES1 (Bits[5:4]) |
+	 * Non-secure EL0/EL1.
+	 */
+	mov	\xreg1, #0x1b1
+	lsl	\xreg2, \xreg4, #10
+	orr	\xreg1, \xreg1, \xreg2
 	msr	scr_el3, \xreg1
 	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3 */
 	mov	\xreg1, #0x33ff
@@ -156,18 +170,46 @@  lr	.req	x30
 	movk	\xreg1, #0x30C5, lsl #16
 	msr	sctlr_el2, \xreg1
 
-	/* Return to the EL2_SP2 mode from EL3 */
 	mov	\xreg1, sp
 	msr	sp_el2, \xreg1		/* Migrate SP */
 	mrs	\xreg1, vbar_el3
 	msr	vbar_el2, \xreg1	/* Migrate VBAR */
+
+	/* Check switch to AArch64 EL2 or AArch32 Hypervisor mode */
+	ldr	\xreg1, =ES_TO_AARCH64
+	cmp	\xreg4, \xreg1
+	b.eq	1f
+	b.ne	2f
+
+1:
+	/* Return to the EL2_SP2 mode from EL3 */
 	mov	\xreg1, #0x3c9
 	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
 	msr	elr_el3, lr
+
+	mov	\xreg1, \xreg5
+	mov	\xreg2, \xreg6
+	mov	\xreg3, \xreg7
+	eret
+
+2:
+	/* Return to AArch32 Hypervisor mode */
+	mov     \xreg1, #0x1da
+	msr	spsr_el3, \xreg1	/* Hyp | D | A | I | F */
+	msr     elr_el3, \xreg5
+
+	mov	\xreg1, #0
+	mov	\xreg2, \xreg6
+	mov	\xreg3, \xreg7
 	eret
+
 .endm
 
-.macro armv8_switch_to_el1_m, xreg1, xreg2
+.macro armv8_switch_to_el1_m, xreg1, xreg2, xreg3, xreg4, xreg5, xreg6, xreg7
+	mov	\xreg5, \xreg1
+	mov	\xreg6, \xreg2
+	mov	\xreg7, \xreg3
+
 	/* Initialize Generic Timers */
 	mrs	\xreg1, cnthctl_el2
 	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to timers */
@@ -188,7 +230,7 @@  lr	.req	x30
 	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
 
 	/* Initialize HCR_EL2 */
-	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
+	lsl	\xreg1, \xreg4, #31
 	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
 	msr	hcr_el2, \xreg1
 
@@ -203,15 +245,39 @@  lr	.req	x30
 	movk	\xreg1, #0x30d0, lsl #16
 	msr	sctlr_el1, \xreg1
 
-	/* Return to the EL1_SP1 mode from EL2 */
 	mov	\xreg1, sp
 	msr	sp_el1, \xreg1		/* Migrate SP */
 	mrs	\xreg1, vbar_el2
 	msr	vbar_el1, \xreg1	/* Migrate VBAR */
+
+	/* Check switch to AArch64 EL1 or AArch32 Supervisor mode */
+	ldr	\xreg1, =ES_TO_AARCH64
+	cmp	\xreg4, \xreg1
+	b.eq	1f
+	b.ne	2f
+
+1:
+	/* Return to the EL1_SP1 mode from EL2 */
 	mov	\xreg1, #0x3c5
 	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
 	msr	elr_el2, lr
+
+	mov	\xreg1, \xreg5
+	mov	\xreg2, \xreg6
+	mov	\xreg3, \xreg7
+	eret
+
+2:
+	/* Return to the Supervisor mode from EL2 */
+	mov     \xreg1, #0x1d3
+	msr     spsr_el2, \xreg1        /* Supervisor | D | A | I | F */
+	msr     elr_el2, \xreg5
+
+	mov	\xreg1, #0
+	mov	\xreg2, \xreg6
+	mov	\xreg3, \xreg7
 	eret
+
 .endm
 
 #if defined(CONFIG_GICV3)
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 9ae890a..fcfe3ae 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -17,6 +17,9 @@ 
 #define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
 #define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
 
+#define ES_TO_AARCH64			1
+#define ES_TO_AARCH32			0
+
 #ifndef __ASSEMBLY__
 
 u64 get_page_table_size(void);
@@ -100,8 +103,26 @@  void __asm_invalidate_icache_all(void);
 int __asm_flush_l3_cache(void);
 void __asm_switch_ttbr(u64 new_ttbr);
 
-void armv8_switch_to_el2(void);
-void armv8_switch_to_el1(void);
+/*
+ * Switch from EL3 to EL2 for ARMv8
+ *
+ * @entry_point: kernel entry point
+ * @mach_nr:     machine nr
+ * @fdt_addr:    fdt address
+ * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
+ */
+void armv8_switch_to_el2(u64 entry_point, u64 mach_nr, u64 fdt_addr,
+			 u64 es_flag);
+/*
+ * Switch from EL2 to EL1 for ARMv8
+ *
+ * @entry_point: kernel entry point
+ * @mach_nr:     machine nr
+ * @fdt_addr:    fdt address
+ * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
+ */
+void armv8_switch_to_el1(u64 entry_point, u64 mach_nr, u64 fdt_addr,
+			 u64 es_flag);
 void gic_init(void);
 void gic_send_sgi(unsigned long sgino);
 void wait_for_wakeup(void);
diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
index 0838d89..f6f685a 100644
--- a/arch/arm/lib/bootm.c
+++ b/arch/arm/lib/bootm.c
@@ -193,9 +193,9 @@  static void do_nonsec_virt_switch(void)
 {
 	smp_kick_all_cpus();
 	dcache_disable();	/* flush cache before swtiching to EL2 */
-	armv8_switch_to_el2();
+	armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
 #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
-	armv8_switch_to_el1();
+	armv8_switch_to_el1(0, 0, 0, ES_TO_AARCH64);
 #endif
 }
 #endif
@@ -286,8 +286,26 @@  static void boot_jump_linux(bootm_headers_t *images, int flag)
 	announce_and_cleanup(fake);
 
 	if (!fake) {
-		do_nonsec_virt_switch();
-		kernel_entry(images->ft_addr, NULL, NULL, NULL);
+		if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
+		    (images->os.arch == IH_ARCH_ARM)) {
+			smp_kick_all_cpus();
+			dcache_disable();
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+			armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
+			armv8_switch_to_el1((u64)images->ep,
+					    (u64)gd->bd->bi_arch_number,
+					    (u64)images->ft_addr,
+					    ES_TO_AARCH32);
+#else
+			armv8_switch_to_el2((u64)images->ep,
+					    (u64)gd->bd->bi_arch_number,
+					    (u64)images->ft_addr,
+					    ES_TO_AARCH32);
+#endif
+		} else {
+			do_nonsec_virt_switch();
+			kernel_entry(images->ft_addr, NULL, NULL, NULL);
+		}
 	}
 #else
 	unsigned long machid = gd->bd->bi_arch_number;
diff --git a/common/image-fit.c b/common/image-fit.c
index 25f8a11..2986469 100644
--- a/common/image-fit.c
+++ b/common/image-fit.c
@@ -1163,7 +1163,8 @@  int fit_image_check_arch(const void *fit, int noffset, uint8_t arch)
 	if (fit_image_get_arch(fit, noffset, &image_arch))
 		return 0;
 	return (arch == image_arch) ||
-		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
+		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64) ||
+		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);
 }
 
 /**
@@ -1586,6 +1587,9 @@  int fit_image_load(bootm_headers_t *images, ulong addr,
 	int type_ok, os_ok;
 	ulong load, data, len;
 	uint8_t os;
+#ifndef USE_HOSTCC
+	uint8_t os_arch;
+#endif
 	const char *prop_name;
 	int ret;
 
@@ -1669,6 +1673,12 @@  int fit_image_load(bootm_headers_t *images, ulong addr,
 		return -ENOEXEC;
 	}
 #endif
+
+#ifndef USE_HOSTCC
+	fit_image_get_arch(fit, noffset, &os_arch);
+	images->os.arch = os_arch;
+#endif
+
 	if (image_type == IH_TYPE_FLATDT &&
 	    !fit_image_check_comp(fit, noffset, IH_COMP_NONE)) {
 		puts("FDT image is compressed");