diff mbox

[U-Boot,v13,1/6] core support of arm64

Message ID 1380202529-52241-2-git-send-email-fenghua@phytium.com.cn
State Superseded
Delegated to: Albert ARIBAUD
Headers show

Commit Message

fenghua@phytium.com.cn Sept. 26, 2013, 1:35 p.m. UTC
From: David Feng <fenghua@phytium.com.cn>

Signed-off-by: David Feng <fenghua@phytium.com.cn>
---
 arch/arm/config.mk                      |    4 +
 arch/arm/cpu/armv8/Makefile             |   38 +++++
 arch/arm/cpu/armv8/cache.S              |  130 +++++++++++++++++
 arch/arm/cpu/armv8/cache_v8.c           |  218 ++++++++++++++++++++++++++++
 arch/arm/cpu/armv8/config.mk            |   16 +++
 arch/arm/cpu/armv8/cpu.c                |   67 +++++++++
 arch/arm/cpu/armv8/exceptions.S         |  115 +++++++++++++++
 arch/arm/cpu/armv8/start.S              |  234 +++++++++++++++++++++++++++++++
 arch/arm/cpu/armv8/timer.c              |   80 +++++++++++
 arch/arm/cpu/armv8/tlb.S                |   30 ++++
 arch/arm/cpu/armv8/u-boot.lds           |   71 ++++++++++
 arch/arm/include/asm/arch-armv8/gpio.h  |   11 ++
 arch/arm/include/asm/arch-armv8/mmu.h   |  110 +++++++++++++++
 arch/arm/include/asm/byteorder.h        |   12 ++
 arch/arm/include/asm/cache.h            |    5 +
 arch/arm/include/asm/config.h           |   10 ++
 arch/arm/include/asm/global_data.h      |    6 +-
 arch/arm/include/asm/io.h               |   15 +-
 arch/arm/include/asm/macro.h            |   39 ++++++
 arch/arm/include/asm/posix_types.h      |   10 ++
 arch/arm/include/asm/proc-armv/ptrace.h |   21 +++
 arch/arm/include/asm/proc-armv/system.h |   59 +++++++-
 arch/arm/include/asm/system.h           |   77 ++++++++++
 arch/arm/include/asm/types.h            |    4 +
 arch/arm/include/asm/u-boot.h           |    4 +
 arch/arm/include/asm/unaligned.h        |    2 +-
 arch/arm/lib/Makefile                   |   14 ++
 arch/arm/lib/board.c                    |   25 +++-
 arch/arm/lib/bootm.c                    |   16 +++
 arch/arm/lib/crt0_64.S                  |  116 +++++++++++++++
 arch/arm/lib/interrupts_64.c            |  120 ++++++++++++++++
 arch/arm/lib/relocate_64.S              |   57 ++++++++
 common/image.c                          |    1 +
 doc/README.arm64                        |   33 +++++
 examples/standalone/stubs.c             |   15 ++
 include/image.h                         |    1 +
 36 files changed, 1775 insertions(+), 11 deletions(-)
 create mode 100644 arch/arm/cpu/armv8/Makefile
 create mode 100644 arch/arm/cpu/armv8/cache.S
 create mode 100644 arch/arm/cpu/armv8/cache_v8.c
 create mode 100644 arch/arm/cpu/armv8/config.mk
 create mode 100644 arch/arm/cpu/armv8/cpu.c
 create mode 100644 arch/arm/cpu/armv8/exceptions.S
 create mode 100644 arch/arm/cpu/armv8/start.S
 create mode 100644 arch/arm/cpu/armv8/timer.c
 create mode 100644 arch/arm/cpu/armv8/tlb.S
 create mode 100644 arch/arm/cpu/armv8/u-boot.lds
 create mode 100644 arch/arm/include/asm/arch-armv8/gpio.h
 create mode 100644 arch/arm/include/asm/arch-armv8/mmu.h
 create mode 100644 arch/arm/lib/crt0_64.S
 create mode 100644 arch/arm/lib/interrupts_64.c
 create mode 100644 arch/arm/lib/relocate_64.S
 create mode 100644 doc/README.arm64

Comments

Rob Herring Oct. 3, 2013, 9:35 p.m. UTC | #1
On 09/26/2013 08:35 AM, fenghua@phytium.com.cn wrote:
> From: David Feng <fenghua@phytium.com.cn>
> 
> Signed-off-by: David Feng <fenghua@phytium.com.cn>
> ---
>  arch/arm/config.mk                      |    4 +

FYI, some recent mainline changes to config.mk break the build for me.

>  arch/arm/cpu/armv8/Makefile             |   38 +++++
>  arch/arm/cpu/armv8/cache.S              |  130 +++++++++++++++++
>  arch/arm/cpu/armv8/cache_v8.c           |  218 ++++++++++++++++++++++++++++
>  arch/arm/cpu/armv8/config.mk            |   16 +++
>  arch/arm/cpu/armv8/cpu.c                |   67 +++++++++
>  arch/arm/cpu/armv8/exceptions.S         |  115 +++++++++++++++
>  arch/arm/cpu/armv8/start.S              |  234 +++++++++++++++++++++++++++++++
>  arch/arm/cpu/armv8/timer.c              |   80 +++++++++++
>  arch/arm/cpu/armv8/tlb.S                |   30 ++++
>  arch/arm/cpu/armv8/u-boot.lds           |   71 ++++++++++
>  arch/arm/include/asm/arch-armv8/gpio.h  |   11 ++
>  arch/arm/include/asm/arch-armv8/mmu.h   |  110 +++++++++++++++
>  arch/arm/include/asm/byteorder.h        |   12 ++
>  arch/arm/include/asm/cache.h            |    5 +
>  arch/arm/include/asm/config.h           |   10 ++
>  arch/arm/include/asm/global_data.h      |    6 +-
>  arch/arm/include/asm/io.h               |   15 +-
>  arch/arm/include/asm/macro.h            |   39 ++++++
>  arch/arm/include/asm/posix_types.h      |   10 ++
>  arch/arm/include/asm/proc-armv/ptrace.h |   21 +++
>  arch/arm/include/asm/proc-armv/system.h |   59 +++++++-
>  arch/arm/include/asm/system.h           |   77 ++++++++++
>  arch/arm/include/asm/types.h            |    4 +
>  arch/arm/include/asm/u-boot.h           |    4 +
>  arch/arm/include/asm/unaligned.h        |    2 +-
>  arch/arm/lib/Makefile                   |   14 ++
>  arch/arm/lib/board.c                    |   25 +++-
>  arch/arm/lib/bootm.c                    |   16 +++
>  arch/arm/lib/crt0_64.S                  |  116 +++++++++++++++
>  arch/arm/lib/interrupts_64.c            |  120 ++++++++++++++++
>  arch/arm/lib/relocate_64.S              |   57 ++++++++
>  common/image.c                          |    1 +
>  doc/README.arm64                        |   33 +++++
>  examples/standalone/stubs.c             |   15 ++
>  include/image.h                         |    1 +
>  36 files changed, 1775 insertions(+), 11 deletions(-)
>  create mode 100644 arch/arm/cpu/armv8/Makefile
>  create mode 100644 arch/arm/cpu/armv8/cache.S
>  create mode 100644 arch/arm/cpu/armv8/cache_v8.c
>  create mode 100644 arch/arm/cpu/armv8/config.mk
>  create mode 100644 arch/arm/cpu/armv8/cpu.c
>  create mode 100644 arch/arm/cpu/armv8/exceptions.S
>  create mode 100644 arch/arm/cpu/armv8/start.S
>  create mode 100644 arch/arm/cpu/armv8/timer.c
>  create mode 100644 arch/arm/cpu/armv8/tlb.S
>  create mode 100644 arch/arm/cpu/armv8/u-boot.lds
>  create mode 100644 arch/arm/include/asm/arch-armv8/gpio.h
>  create mode 100644 arch/arm/include/asm/arch-armv8/mmu.h
>  create mode 100644 arch/arm/lib/crt0_64.S
>  create mode 100644 arch/arm/lib/interrupts_64.c
>  create mode 100644 arch/arm/lib/relocate_64.S
>  create mode 100644 doc/README.arm64
> 
> diff --git a/arch/arm/config.mk b/arch/arm/config.mk
> index ce3903b..95c07ad 100644
> --- a/arch/arm/config.mk
> +++ b/arch/arm/config.mk
> @@ -74,7 +74,9 @@ endif
>  endif
>  
>  # needed for relocation
> +ifndef CONFIG_ARM64
>  LDFLAGS_u-boot += -pie
> +endif
>  
>  #
>  # FIXME: binutils versions < 2.22 have a bug in the assembler where
> @@ -95,6 +97,8 @@ endif
>  endif
>  
>  # check that only R_ARM_RELATIVE relocations are generated
> +ifndef CONFIG_ARM64
>  ifneq ($(CONFIG_SPL_BUILD),y)
>  ALL-y	+= checkarmreloc
>  endif
> +endif
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> new file mode 100644
> index 0000000..b216f27
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -0,0 +1,38 @@
> +#
> +# (C) Copyright 2000-2003
> +# Wolfgang Denk, DENX Software Engineering, wd@denx.de.
> +#
> +# SPDX-License-Identifier:	GPL-2.0+
> +#
> +
> +include $(TOPDIR)/config.mk
> +
> +LIB	= $(obj)lib$(CPU).o
> +
> +START	:= start.o
> +
> +COBJS	+= cpu.o
> +COBJS	+= timer.o
> +COBJS	+= cache_v8.o
> +
> +SOBJS	+= exceptions.o
> +SOBJS	+= cache.o
> +SOBJS	+= tlb.o
> +
> +SRCS	:= $(START:.o=.S) $(COBJS:.o=.c)
> +OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
> +START	:= $(addprefix $(obj),$(START))
> +
> +all:	$(obj).depend $(START) $(LIB)
> +
> +$(LIB):	$(OBJS)
> +	$(call cmd_link_o_target, $(OBJS))
> +
> +#########################################################################
> +
> +# defines $(obj).depend target
> +include $(SRCTREE)/rules.mk
> +
> +sinclude $(obj).depend
> +
> +#########################################################################
> diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
> new file mode 100644
> index 0000000..419f169
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cache.S
> @@ -0,0 +1,130 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * void __asm_flush_dcache_level(level)
> + *
> + * clean and invalidate one level cache.
> + *
> + * x0: cache level
> + * x1~x9: clobbered
> + */
> +ENTRY(__asm_flush_dcache_level)
> +	lsl	x1, x0, #1
> +	msr	csselr_el1, x1		/* select cache level */
> +	isb				/* isb to sych the new cssr & csidr */
> +	mrs	x6, ccsidr_el1		/* read the new ccsidr */
> +	and	x2, x6, #7		/* x2 <- length of the cache lines */
> +	add	x2, x2, #4		/* add 4 (line length offset) */
> +	mov	x3, #0x3ff
> +	and	x3, x3, x6, lsr #3	/* x3 <- maximum number of way size */
> +	clz	w5, w3			/* bit position of way size */
> +	mov	x4, #0x7fff
> +	and	x4, x4, x1, lsr #13	/* x4 <- max number of the set size */
> +	/* x1 <- cache level << 1 */
> +	/* x2 <- line length offset */
> +	/* x3 <- number of cache ways */
> +	/* x4 <- number of cache sets */
> +	/* x5 <- bit position of way size */
> +
> +loop_set:
> +	mov	x6, x3			/* create working copy of way size */
> +loop_way:
> +	lsl	x7, x6, x5
> +	orr	x9, x0, x7		/* map way and level to cisw value */
> +	lsl	x7, x4, x2
> +	orr	x9, x9, x7		/* map set number to cisw value */
> +	dc	cisw, x9		/* clean & invalidate by set/way */
> +	subs	x6, x6, #1		/* decrement the way */
> +	b.ge	loop_way
> +	subs	x4, x4, #1		/* decrement the set */
> +	b.ge	loop_set
> +
> +	ret
> +ENDPROC(__asm_flush_dcache_level)
> +
> +/*
> + * void __asm_flush_dcache_all(void)
> + *
> + * clean and invalidate all data cache by SET/WAY.
> + */
> +ENTRY(__asm_flush_dcache_all)
> +	dsb	sy
> +	mov	x15, lr
> +	mrs	x10, clidr_el1		/* read clidr */
> +	lsr	x11, x10, #24
> +	and	x11, x11, #0x7		/* x11 <- loc */
> +	cbz	x11, finished		/* if loc is 0, no need to clean */
> +	mov	x0, #0			/* start flush at cache level 0 */
> +	/* x0  <- cache level */
> +	/* x10 <- clidr_el1 */
> +	/* x11 <- loc */
> +
> +loop_level:
> +	lsl	x1, x0, #1
> +	add	x1, x1, x0		/* x0 <- 3x cache level */
> +	lsr	x1, x10, x1
> +	and	x1, x1, #7		/* x1 <- cache type */
> +	cmp	x1, #2
> +	b.lt	skip			/* skip if no cache or icache */
> +	bl	__asm_flush_dcache_level
> +skip:
> +	add	x0, x0, #1		/* increment cache level */
> +	cmp	x11, x0
> +	b.gt	loop_level
> +
> +finished:
> +	mov	x0, #0
> +	msr	csselr_el1, x0		/* swith back to cache level 0 */
> +	dsb	sy
> +	isb
> +	mov	lr, x15
> +	ret
> +ENDPROC(__asm_flush_dcache_all)
> +
> +/*
> + * void __asm_flush_dcache_range(start, end)
> + *
> + * clean & invalidate data cache in the range
> + *
> + * x0: start address
> + * x1: end address
> + */
> +ENTRY(__asm_flush_dcache_range)
> +	mrs	x3, ctr_el0		/* read CTR */
> +	lsr	x3, x3, #16
> +	and	x3, x3, #0xf		/* cache line size encoding */
> +	mov	x2, #4			/* bytes per word */
> +	lsl	x2, x2, x3		/* actual cache line size */
> +
> +	/* x2 <- minimal cache line size in cache system */
> +	sub	x3, x2, #1
> +	bic	x0, x0, x3
> +1:      dc	civac, x0		/* clean & invalidate D/unified line */
> +	add	x0, x0, x2
> +	cmp	x0, x1
> +	b.lo	1b
> +	dsb	sy
> +	ret
> +ENDPROC(__asm_flush_dcache_range)
> +
> +/*
> + * void __asm_invalidate_icache_all(void)
> + *
> + * invalidate all tlb entries.
> + */
> +ENTRY(__asm_invalidate_icache_all)
> +	ic	ialluis
> +	isb	sy
> +	ret
> +ENDPROC(__asm_invalidate_icache_all)
> diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
> new file mode 100644
> index 0000000..34426fd
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cache_v8.c
> @@ -0,0 +1,218 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <asm/system.h>
> +#include <asm/arch/mmu.h>
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +#ifndef CONFIG_SYS_DCACHE_OFF
> +
> +static void set_pgtable_section(u64 section, u64 memory_type)
> +{
> +	u64 *page_table = (u64 *)gd->arch.tlb_addr;
> +	u64 value;
> +
> +	value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
> +	value |= PMD_ATTRINDX(memory_type);
> +	page_table[section] = value;
> +}
> +
> +/* to activate the MMU we need to set up virtual memory */
> +static void mmu_setup(void)
> +{
> +	int i, j, el;
> +	bd_t *bd = gd->bd;
> +
> +	/* Setup an identity-mapping for all spaces */
> +	for (i = 0; i < (PAGE_SIZE >> 3); i++)
> +		set_pgtable_section(i, MT_DEVICE_NGNRNE);
> +
> +	/* Setup an identity-mapping for all RAM space */
> +	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
> +		ulong start = bd->bi_dram[i].start;
> +		ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size;
> +		for (j = start >> SECTION_SHIFT;
> +		     j < end >> SECTION_SHIFT; j++) {
> +			set_pgtable_section(j, MT_NORMAL);
> +		}
> +	}
> +
> +	/* load TTBR0 */
> +	el = curent_el();
> +	if (el == 1)
> +		asm volatile("msr ttbr0_el1, %0"
> +			     : : "r" (gd->arch.tlb_addr) : "memory");
> +	else if (el == 2)
> +		asm volatile("msr ttbr0_el2, %0"
> +			     : : "r" (gd->arch.tlb_addr) : "memory");
> +	else
> +		panic("Not Supported Exception Level");
> +
> +	/* enable the mmu */
> +	set_sctlr(get_sctlr() | CR_M);
> +}
> +
> +/*
> + * Performs a invalidation of the entire data cache at all levels
> + */
> +void invalidate_dcache_all(void)
> +{
> +	__asm_flush_dcache_all();
> +}
> +
> +/*
> + * Performs a clean & invalidation of the entire data cache at all levels
> + */
> +void flush_dcache_all(void)
> +{
> +	__asm_flush_dcache_all();
> +}
> +
> +/*
> + * Invalidates range in all levels of D-cache/unified cache
> + */
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	__asm_flush_dcache_range(start, stop);
> +}
> +
> +/*
> + * Flush range(clean & invalidate) from all levels of D-cache/unified cache
> + */
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	__asm_flush_dcache_range(start, stop);
> +}
> +
> +void dcache_enable(void)
> +{
> +	/* The data cache is not active unless the mmu is enabled */
> +	if (!(get_sctlr() & CR_M)) {
> +		invalidate_dcache_all();
> +		__asm_invalidate_tlb_all();
> +		mmu_setup();
> +	}
> +
> +	set_sctlr(get_sctlr() | CR_C);
> +}
> +
> +void dcache_disable(void)
> +{
> +	uint32_t sctlr;
> +
> +	sctlr = get_sctlr();
> +
> +	/* if cache isn't enabled no need to disable */
> +	if (!(sctlr & CR_C))
> +		return;
> +
> +	set_sctlr(sctlr & ~(CR_C|CR_M));
> +
> +	flush_dcache_all();
> +	__asm_invalidate_tlb_all();
> +}
> +
> +int dcache_status(void)
> +{
> +	return (get_sctlr() & CR_C) != 0;
> +}
> +
> +#else	/* CONFIG_SYS_DCACHE_OFF */
> +
> +void invalidate_dcache_all(void)
> +{
> +}
> +
> +void flush_dcache_all(void)
> +{
> +}
> +
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +}
> +
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +}
> +
> +void dcache_enable(void)
> +{
> +}
> +
> +void dcache_disable(void)
> +{
> +}
> +
> +int dcache_status(void)
> +{
> +	return 0;
> +}
> +
> +#endif	/* CONFIG_SYS_DCACHE_OFF */
> +
> +#ifndef CONFIG_SYS_ICACHE_OFF
> +
> +void icache_enable(void)
> +{
> +	set_sctlr(get_sctlr() | CR_I);
> +}
> +
> +void icache_disable(void)
> +{
> +	set_sctlr(get_sctlr() & ~CR_I);
> +}
> +
> +int icache_status(void)
> +{
> +	return (get_sctlr() & CR_I) != 0;
> +}
> +
> +void invalidate_icache_all(void)
> +{
> +	__asm_invalidate_icache_all();
> +}
> +
> +#else	/* CONFIG_SYS_ICACHE_OFF */
> +
> +void icache_enable(void)
> +{
> +}
> +
> +void icache_disable(void)
> +{
> +}
> +
> +int icache_status(void)
> +{
> +	return 0;
> +}
> +
> +void invalidate_icache_all(void)
> +{
> +}
> +
> +#endif	/* CONFIG_SYS_ICACHE_OFF */
> +
> +/*
> + * Enable dCache & iCache, whether cache is actually enabled
> + * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
> + */
> +void enable_caches(void)
> +{
> +	icache_enable();
> +	dcache_enable();
> +}
> +
> +/*
> + * Flush range from all levels of d-cache/unified-cache
> + */
> +void flush_cache(unsigned long start, unsigned long size)
> +{
> +	flush_dcache_range(start, start + size);
> +}
> diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
> new file mode 100644
> index 0000000..9f36d59
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/config.mk
> @@ -0,0 +1,16 @@
> +#
> +# (C) Copyright 2002
> +# Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
> +#
> +# SPDX-License-Identifier:	GPL-2.0+
> +#
> +PLATFORM_RELFLAGS += -fno-common -ffixed-x18
> +
> +# SEE README.arm-unaligned-accesses
> +PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
> +PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
> +
> +PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
> +PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
> +PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
> +PLATFORM_CPPFLAGS += -fpic
> diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
> new file mode 100644
> index 0000000..83e73ab
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cpu.c
> @@ -0,0 +1,67 @@
> +/*
> + * (C) Copyright 2008 Texas Insturments
> + *
> + * (C) Copyright 2002
> + * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
> + * Marius Groeger <mgroeger@sysgo.de>
> + *
> + * (C) Copyright 2002
> + * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +/*
> + * CPU specific code
> + */
> +
> +#include <common.h>
> +#include <command.h>
> +#include <asm/system.h>
> +#include <linux/compiler.h>
> +
> +void __weak cpu_cache_initialization(void){}
> +
> +int cleanup_before_linux(void)
> +{
> +	/*
> +	 * this function is called just before we call linux
> +	 * it prepares the processor for linux
> +	 *
> +	 * we turn off caches etc ...
> +	 */
> +#ifndef CONFIG_SPL_BUILD
> +	disable_interrupts();
> +#endif
> +
> +	/*
> +	 * Turn off I-cache and invalidate it
> +	 */
> +	icache_disable();
> +	invalidate_icache_all();
> +
> +	/*
> +	 * turn off D-cache
> +	 * dcache_disable() in turn flushes the d-cache and disables MMU
> +	 */
> +	dcache_disable();
> +
> +	/*
> +	 * After D-cache is flushed and before it is disabled there may
> +	 * be some new valid entries brought into the cache. We are sure
> +	 * that these lines are not dirty and will not affect our execution.
> +	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
> +	 * is all we did during this. We have not pushed anything on to the
> +	 * stack. Neither have we affected any static data)
> +	 * So just invalidate the entire d-cache again to avoid coherency
> +	 * problems for kernel
> +	 */
> +	invalidate_dcache_all();
> +
> +	/*
> +	 * Some CPU need more cache attention before starting the kernel.
> +	 */
> +	cpu_cache_initialization();
> +
> +	return 0;
> +}
> diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
> new file mode 100644
> index 0000000..b2f62c9
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/exceptions.S
> @@ -0,0 +1,115 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <asm/ptrace.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * Enter Exception.
> + * This will save the processor state that is ELR/X0~X30
> + * to the stack frame.
> + */
> +.macro	exception_entry
> +	stp	x29, x30, [sp, #-16]!
> +	stp	x27, x28, [sp, #-16]!
> +	stp	x25, x26, [sp, #-16]!
> +	stp	x23, x24, [sp, #-16]!
> +	stp	x21, x22, [sp, #-16]!
> +	stp	x19, x20, [sp, #-16]!
> +	stp	x17, x18, [sp, #-16]!
> +	stp	x15, x16, [sp, #-16]!
> +	stp	x13, x14, [sp, #-16]!
> +	stp	x11, x12, [sp, #-16]!
> +	stp	x9, x10, [sp, #-16]!
> +	stp	x7, x8, [sp, #-16]!
> +	stp	x5, x6, [sp, #-16]!
> +	stp	x3, x4, [sp, #-16]!
> +	stp	x1, x2, [sp, #-16]!
> +
> +	/* Could be running at EL1 or EL2 */
> +	mrs	x11, CurrentEL
> +	cmp	x11, 0x4
> +	b.eq	1f
> +	cmp	x11, 0x8
> +	b.eq	2f
> +	b	3f
> +1:	mrs	x1, esr_el1
> +	mrs	x2, elr_el1
> +	b	3f
> +2:	mrs	x1, esr_el2
> +	mrs	x2, elr_el2
> +3:
> +	stp	x2, x0, [sp, #-16]!
> +	mov	x0, sp
> +.endm
> +
> +/*
> + * Exception vectors.
> + */
> +	.align	11
> +	.globl	vectors
> +vectors:
> +	.align	7
> +	b	_do_bad_sync	/* Current EL Synchronous Thread */
> +
> +	.align	7
> +	b	_do_bad_irq	/* Current EL IRQ Thread */
> +
> +	.align	7
> +	b	_do_bad_fiq	/* Current EL FIQ Thread */
> +
> +	.align	7
> +	b	_do_bad_error	/* Current EL Error Thread */
> +
> +	.align	7
> +	b	_do_sync	/* Current EL Synchronous Handler */
> +
> +	.align	7
> +	b	_do_irq		/* Current EL IRQ Handler */
> +
> +	.align	7
> +	b	_do_fiq		/* Current EL FIQ Handler */
> +
> +	.align	7
> +	b	_do_error	/* Current EL Error Handler */
> +
> +
> +_do_bad_sync:
> +	exception_entry
> +	bl	do_bad_sync
> +
> +_do_bad_irq:
> +	exception_entry
> +	bl	do_bad_irq
> +
> +_do_bad_fiq:
> +	exception_entry
> +	bl	do_bad_fiq
> +
> +_do_bad_error:
> +	exception_entry
> +	bl	do_bad_error
> +
> +_do_sync:
> +	exception_entry
> +	bl	do_sync
> +
> +_do_irq:
> +	exception_entry
> +	bl	do_irq
> +
> +_do_fiq:
> +	exception_entry
> +	bl	do_fiq
> +
> +_do_error:
> +	exception_entry
> +	bl	do_error
> diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
> new file mode 100644
> index 0000000..28c8fe3
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/start.S
> @@ -0,0 +1,234 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +#include <asm/arch/mmu.h>
> +
> +/*************************************************************************
> + *
> + * Startup Code (reset vector)
> + *
> + *************************************************************************/
> +
> +.globl	_start
> +_start:
> +	b	reset
> +
> +	.align 3
> +
> +.globl	_TEXT_BASE
> +_TEXT_BASE:
> +	.quad	CONFIG_SYS_TEXT_BASE
> +
> +/*
> + * These are defined in the linker script.
> + */
> +.globl	_end_ofs
> +_end_ofs:
> +	.quad	_end - _start
> +
> +.globl	_bss_start_ofs
> +_bss_start_ofs:
> +	.quad	__bss_start - _start
> +
> +.globl	_bss_end_ofs
> +_bss_end_ofs:
> +	.quad	__bss_end - _start
> +
> +reset:
> +	/*
> +	 * Could be EL3/EL2/EL1
> +	 */
> +	mrs	x0, CurrentEL
> +	cmp	x0, #0xc
> +	b.ne	reset_nonsecure			/* Not EL3 */
> +
> +	bl	setup_el3			/* EL3 initialization */
> +
> +	/*
> +	 * MMU Disabled, iCache Disabled, dCache Disabled
> +	 */
> +reset_nonsecure:
> +
> +#ifdef CONFIG_BOOTING_EL1
> +	switch_el1_el2 x0, 1f, 2f, 3f
> +1:	b	3f
> +2:	bl	setup_el2			/* EL2 initialization */
> +3:
> +#endif
> +
> +	/* Initialize vBAR/CPACR_EL1/MDSCR_EL1 */
> +	adr	x0, vectors
> +	switch_el1_el2 x1, 1f, 2f, 3f
> +1:	msr	vbar_el1, x0
> +	mov	x0, #3 << 20
> +	msr	cpacr_el1, x0			/* Enable FP/SIMD */
> +	msr	mdscr_el1, xzr
> +	b	3f
> +2:	msr	vbar_el2, x0
> +3:
> +
> +	/* Cache/BPB/TLB Invalidate */
> +	bl	__asm_flush_dcache_all		/* dCache clean & invalidate */
> +	bl	__asm_invalidate_icache_all	/* iCache invalidate */
> +	bl	__asm_invalidate_tlb_all	/* invalidate TLBs */
> +
> +	/* Processor specific initialization */
> +	bl	lowlevel_init
> +
> +	branch_if_slave	x0, slave_cpu
> +
> +	/*
> +	 * Master CPU
> +	 */
> +master_cpu:
> +	bl	_main
> +
> +	/*
> +	 * Slave CPUs
> +	 */
> +slave_cpu:
> +	wfe
> +	ldr	x1, =SECONDARY_CPU_MAILBOX

This is platform specific. Not all platforms will boot secondary cores
into u-boot.

You could simply ifdef this code with SECONDARY_CPU_MAILBOX.

> +	ldr	x0, [x1]
> +	cbz	x0, slave_cpu
> +	br	x0			/* branch to the given address */
> +
> +/*-------------------------------------------------------------------------*/
> +
> +WEAK(setup_el3)

This function should be optional so platforms don't have to define
GIC_DIST_BASE, GIC_CPU_BASE, and CONFIG_SYS_CNTFRQ.

> +	mov	x0, #0x531	/* Non-secure EL0/EL1 | HVC | 64bit EL2 */
> +	msr	scr_el3, x0
> +	msr	cptr_el3, xzr	/* Disable coprocessor traps to EL3 */
> +
> +	/* GIC initialization */
> +	branch_if_slave	x0, 2f
> +
> +	/* Master initialize distributor */
> +	ldr	x1, =GIC_DIST_BASE	/* GICD_CTLR */
> +	mov	w0, #0x3		/* Enable Group0 & Group1 */
> +	str	w0, [x1]
> +	ldr	w0, [x1, #0x4]		/* GICD_TYPER */
> +	and	w2, w0, #0x1f		/* ITLinesNumber */
> +	add	w2, w2, #0x1		/* Number of GICD_IGROUPR registers */
> +	add	x1, x1, #0x80		/* GICD_IGROUPR */
> +	mov	w0, #~0			/* All Group1 */
> +1:	str	w0, [x1], #0x4
> +	sub	w2, w2, #0x1
> +	cbnz	w2, 1b
> +	b	3f
> +
> +	/* Slave initialize distributor */
> +2:	ldr	x1, =GIC_DIST_BASE	/* GICD_CTLR */
> +	mov	w0, #~0			/* All Group1 */
> +	str	w0, [x1, #0x80]
> +
> +	/* Initialize cpu interface */
> +3:	ldr	x1, =GIC_CPU_BASE	/* GICC_CTLR */
> +	mov	w0, #0x3		/* Enable Group0 & Group1 */
> +	str	w0, [x1]
> +
> +	mov	w0, #0x1 << 7		/* Non-Secure access to GICC_PMR */
> +	str	w0, [x1, #0x4]		/* GICC_PMR */
> +
> +	/* Counter frequency initialization */
> +	ldr	x0, =CONFIG_SYS_CNTFRQ
> +	msr	cntfrq_el0, x0
> +
> +	/* SCTLR_EL2 initialization */
> +	msr	sctlr_el2, xzr
> +
> +	/* Return to the EL2_SP2 mode from EL3 */
> +	mov	x0, #0x3c9		/* EL2_SP2 | D | A | I | F */
> +	msr	elr_el3, lr
> +	msr	spsr_el3, x0
> +	eret
> +ENDPROC(setup_el3)
> +
> +WEAK(setup_el2)
> +	/* Initialize Generic Timers */
> +	mrs	x0, cnthctl_el2
> +	orr	x0, x0, #0x3		/* Enable EL1 access to timers */
> +	msr	cnthctl_el2, x0
> +	msr	cntvoff_el2, x0		/* Clear virtual offset */
> +	mrs	x0, cntkctl_el1
> +	orr	x0, x0, #0x3		/* EL0 access to counters */
> +	msr	cntkctl_el1, x0
> +
> +	/* Initilize MPID/MPIDR registers */
> +	mrs	x0, midr_el1
> +	mrs	x1, mpidr_el1
> +	msr	vpidr_el2, x0
> +	msr	vmpidr_el2, x1
> +
> +	/* Disable coprocessor traps */
> +	mov	x0, #0x33ff
> +	msr	cptr_el2, x0		/* Disable coprocessor traps to EL2 */
> +	msr	hstr_el2, xzr		/* Disable CP15 traps to EL2 */
> +
> +	/* Initialize HCR_EL2 */
> +	mov	x0, #(1 << 31)		/* 64bit EL1 */
> +	orr	x0, x0, #(1 << 29)	/* Disable HVC */
> +	msr	hcr_el2, x0
> +
> +	/* SCTLR_EL1 initialization */
> +	mov	x0, #0x0800
> +	movk	x0, #0x30d0, lsl #16
> +	msr	sctlr_el1, x0
> +
> +	/* Return to the EL1_SP1 mode from EL2 */
> +	mov	x0, #0x3c5		/* EL1_SP1 | D | A | I | F */
> +	msr	elr_el2, lr
> +	msr	spsr_el2, x0
> +	eret
> +ENDPROC(setup_el2)
> +
> +WEAK(lowlevel_init)
> +	ret
> +ENDPROC(lowlevel_init)
> +
> +/*-------------------------------------------------------------------------*/
> +
> +ENTRY(c_runtime_cpu_setup)
> +	/* If I-cache is enabled invalidate it */
> +#ifndef CONFIG_SYS_ICACHE_OFF
> +	ic	iallu			/* I+BTB cache invalidate */
> +	isb	sy
> +#endif
> +
> +#ifndef CONFIG_SYS_DCACHE_OFF
> +	/*
> +	 * Setup MAIR and TCR. Using 512GB address range.
> +	 */
> +	ldr	x0, =MEMORY_ATTRIBUTES
> +	ldr	x1, =TCR_FLAGS
> +
> +	switch_el1_el2 x2, 1f, 2f, 3f
> +1:	orr	x1, x1, TCR_EL1_IPS_40BIT
> +	msr	mair_el1, x0
> +	msr	tcr_el1, x1
> +	b	3f
> +2:	orr	x1, x1, TCR_EL2_IPS_40BIT
> +	msr	mair_el2, x0
> +	msr	tcr_el2, x1
> +3:
> +#endif
> +
> +	/* Relocate vBAR */
> +	adr	x0, vectors
> +	switch_el1_el2 x1, 1f, 2f, 3f
> +1:	msr	vbar_el1, x0
> +	b	3f
> +2:	msr	vbar_el2, x0
> +3:
> +
> +	ret
> +ENDPROC(c_runtime_cpu_setup)
> diff --git a/arch/arm/cpu/armv8/timer.c b/arch/arm/cpu/armv8/timer.c
> new file mode 100644
> index 0000000..9605e84
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/timer.c
> @@ -0,0 +1,80 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <div64.h>
> +#include <linux/compiler.h>
> +
> +/*
> + * Genertic Timer implementation of __udelay/get_timer/get_ticks/get_tbclk
> + * functions. If any other timers used, another implementation should be
> + * placed in platform code.
> + */
> +
> +static inline u64 get_cntfrq(void)
> +{
> +	u64 cntfrq;
> +	asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq));
> +	return cntfrq;
> +}
> +
> +static inline u64 tick_to_time(u64 tick)
> +{
> +	tick *= CONFIG_SYS_HZ;
> +	do_div(tick, get_cntfrq());
> +	return tick;
> +}
> +
> +static inline u64 time_to_tick(u64 time)
> +{
> +	time *= get_cntfrq();
> +	do_div(time, CONFIG_SYS_HZ);
> +	return time;
> +}
> +
> +/*
> + * Generic timer implementation of get_tbclk()
> + */
> +ulong __weak get_tbclk(void)
> +{
> +	return CONFIG_SYS_HZ;

You should return get_cntfreq here.

> +}
> +
> +/*
> + * Generic timer implementation of get_timer()
> + */
> +ulong __weak get_timer(ulong base)
> +{
> +	u64 cval;
> +
> +	isb();
> +	asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
> +
> +	return tick_to_time(cval) - base;
> +}
> +
> +/*
> + * Generic timer implementation of get_ticks()
> + */
> +unsigned long long __weak get_ticks(void)
> +{
> +	return get_timer(0);
> +}
> +
> +/*
> + * Generic timer implementation of __udelay()
> + */
> +void __weak __udelay(ulong usec)
> +{
> +	unsigned long ticks, limit;
> +
> +	limit = get_ticks() + usec/1000;

Your udelay implementation has 1 msec resolution. It should use the raw
timer to get usec resolution.

> +
> +	do {
> +		ticks = get_ticks();
> +	} while (ticks < limit);
> +}
> diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
> new file mode 100644
> index 0000000..6bb1e1f
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/tlb.S
> @@ -0,0 +1,30 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +
> +/*
> + * void __asm_invalidate_tlb_all(void)
> + *
> + * invalidate all tlb entries.
> + */
> +ENTRY(__asm_invalidate_tlb_all)
> +	switch_el1_el2 x9, 1f, 2f, 3f
> +1:	tlbi	vmalle1
> +	dsb	sy
> +	isb
> +	b	3f
> +2:	tlbi	alle2
> +	dsb	sy
> +	isb
> +3:
> +	ret
> +ENDPROC(__asm_invalidate_tlb_all)
> diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
> new file mode 100644
> index 0000000..328d477
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/u-boot.lds
> @@ -0,0 +1,71 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * (C) Copyright 2002
> + * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
> +OUTPUT_ARCH(aarch64)
> +ENTRY(_start)
> +SECTIONS
> +{
> +	. = 0x00000000;
> +
> +	. = ALIGN(8);
> +	.text :
> +	{
> +		*(.__image_copy_start)
> +		CPUDIR/start.o (.text*)
> +		*(.text*)
> +	}
> +
> +	. = ALIGN(8);
> +	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
> +
> +	. = ALIGN(8);
> +	.data : {
> +		*(.data*)
> +	}
> +
> +	. = ALIGN(8);
> +
> +	. = .;
> +
> +	. = ALIGN(8);
> +	.u_boot_list : {
> +		KEEP(*(SORT(.u_boot_list*)));
> +	}
> +
> +	. = ALIGN(8);
> +	.reloc : {
> +		__rel_got_start = .;
> +		*(.got)
> +		__rel_got_end = .;
> +	}
> +
> +	.image_copy_end :
> +	{
> +		*(.__image_copy_end)
> +	}
> +
> +	_end = .;
> +
> +	. = ALIGN(8);
> +	.bss : {
> +		__bss_start = .;
> +		*(.bss*)
> +		 . = ALIGN(8);
> +		__bss_end = .;
> +	}
> +
> +	/DISCARD/ : { *(.dynsym) }
> +	/DISCARD/ : { *(.dynstr*) }
> +	/DISCARD/ : { *(.dynamic*) }
> +	/DISCARD/ : { *(.plt*) }
> +	/DISCARD/ : { *(.interp*) }
> +	/DISCARD/ : { *(.gnu*) }
> +}
> diff --git a/arch/arm/include/asm/arch-armv8/gpio.h b/arch/arm/include/asm/arch-armv8/gpio.h
> new file mode 100644
> index 0000000..afe7ece
> --- /dev/null
> +++ b/arch/arm/include/asm/arch-armv8/gpio.h
> @@ -0,0 +1,11 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#ifndef _ASM_ARMV8_GPIO_H_
> +#define _ASM_ARMV8_GPIO_H_
> +
> +#endif	/* _ASM_ARMV8_GPIO_H_ */
> diff --git a/arch/arm/include/asm/arch-armv8/mmu.h b/arch/arm/include/asm/arch-armv8/mmu.h
> new file mode 100644
> index 0000000..33b3246
> --- /dev/null
> +++ b/arch/arm/include/asm/arch-armv8/mmu.h
> @@ -0,0 +1,110 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#ifndef _ASM_ARMV8_MMU_H_
> +#define _ASM_ARMV8_MMU_H_
> +
> +#ifdef __ASSEMBLY__
> +#define _AC(X, Y)	X
> +#else
> +#define _AC(X, Y)	(X##Y)
> +#endif
> +
> +#define UL(x)		_AC(x, UL)
> +
> +/***************************************************************/
> +/*
> + * The following definitions are related each other, shoud be
> + * calculated specifically.
> + */
> +#define VA_BITS			(39)
> +
> +/* PAGE_SHIFT determines the page size */
> +#undef  PAGE_SIZE
> +#define PAGE_SHIFT		16
> +#define PAGE_SIZE		(1 << PAGE_SHIFT)
> +#define PAGE_MASK		(~(PAGE_SIZE-1))
> +
> +/*
> + * section address mask and size definitions.
> + */
> +#define SECTION_SHIFT		29
> +#define SECTION_SIZE		(UL(1) << SECTION_SHIFT)
> +#define SECTION_MASK		(~(SECTION_SIZE-1))
> +/***************************************************************/
> +
> +/*
> + * Memory types
> + */
> +#define MT_DEVICE_NGNRNE	0
> +#define MT_DEVICE_NGNRE		1
> +#define MT_DEVICE_GRE		2
> +#define MT_NORMAL_NC		3
> +#define MT_NORMAL		4
> +
> +#define MEMORY_ATTRIBUTES	((0x00 << MT_DEVICE_NGNRNE*8) |	\
> +				(0x04 << MT_DEVICE_NGNRE*8) |	\
> +				(0x0c << MT_DEVICE_GRE*8) |	\
> +				(0x44 << MT_NORMAL_NC*8) |	\
> +				(0xff << MT_NORMAL*8))
> +
> +/*
> + * Hardware page table definitions.
> + *
> + * Level 2 descriptor (PMD).
> + */
> +#define PMD_TYPE_MASK		(3 << 0)
> +#define PMD_TYPE_FAULT		(0 << 0)
> +#define PMD_TYPE_TABLE		(3 << 0)
> +#define PMD_TYPE_SECT		(1 << 0)
> +
> +/*
> + * Section
> + */
> +#define PMD_SECT_S		(3 << 8)
> +#define PMD_SECT_AF		(1 << 10)
> +#define PMD_SECT_NG		(1 << 11)
> +#define PMD_SECT_PXN		(UL(1) << 53)
> +#define PMD_SECT_UXN		(UL(1) << 54)
> +
> +/*
> + * AttrIndx[2:0]
> + */
> +#define PMD_ATTRINDX(t)		((t) << 2)
> +#define PMD_ATTRINDX_MASK	(7 << 2)
> +
> +/*
> + * TCR flags.
> + */
> +#define TCR_T0SZ(x)		((64 - (x)) << 0)
> +#define TCR_IRGN_NC		(0 << 8)
> +#define TCR_IRGN_WBWA		(1 << 8)
> +#define TCR_IRGN_WT		(2 << 8)
> +#define TCR_IRGN_WBNWA		(3 << 8)
> +#define TCR_IRGN_MASK		(3 << 8)
> +#define TCR_ORGN_NC		(0 << 10)
> +#define TCR_ORGN_WBWA		(1 << 10)
> +#define TCR_ORGN_WT		(2 << 10)
> +#define TCR_ORGN_WBNWA		(3 << 10)
> +#define TCR_ORGN_MASK		(3 << 10)
> +#define TCR_SHARED_NON		(0 << 12)
> +#define TCR_SHARED_OUTER	(1 << 12)
> +#define TCR_SHARED_INNER	(2 << 12)
> +#define TCR_TG0_4K		(0 << 14)
> +#define TCR_TG0_64K		(1 << 14)
> +#define TCR_TG0_16K		(2 << 14)
> +#define TCR_EL1_IPS_40BIT	(2 << 32)
> +#define TCR_EL2_IPS_40BIT	(2 << 16)
> +
> +/* PTWs cacheable, inner/outer WBWA and non-shareable */
> +#define TCR_FLAGS		(TCR_TG0_64K |		\
> +				TCR_SHARED_NON |	\
> +				TCR_ORGN_WBWA |		\
> +				TCR_IRGN_WBWA |		\
> +				TCR_T0SZ(VA_BITS))
> +
> +#endif /* _ASM_ARMV8_MMU_H_ */
> diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h
> index c3489f1..71a9966 100644
> --- a/arch/arm/include/asm/byteorder.h
> +++ b/arch/arm/include/asm/byteorder.h
> @@ -23,10 +23,22 @@
>  #  define __SWAB_64_THRU_32__
>  #endif
>  
> +#ifdef	CONFIG_ARM64
> +
> +#ifdef __AARCH64EB__
> +#include <linux/byteorder/big_endian.h>
> +#else
> +#include <linux/byteorder/little_endian.h>
> +#endif
> +
> +#else	/* CONFIG_ARM64 */
> +
>  #ifdef __ARMEB__
>  #include <linux/byteorder/big_endian.h>
>  #else
>  #include <linux/byteorder/little_endian.h>
>  #endif
>  
> +#endif	/* CONFIG_ARM64 */
> +
>  #endif
> diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> index 6d60a4a..ddebbc8 100644
> --- a/arch/arm/include/asm/cache.h
> +++ b/arch/arm/include/asm/cache.h
> @@ -11,6 +11,8 @@
>  
>  #include <asm/system.h>
>  
> +#ifndef CONFIG_ARM64
> +
>  /*
>   * Invalidate L2 Cache using co-proc instruction
>   */
> @@ -28,6 +30,9 @@ void l2_cache_disable(void);
>  void set_section_dcache(int section, enum dcache_option option);
>  
>  void dram_bank_mmu_setup(int bank);
> +
> +#endif
> +
>  /*
>   * The current upper bound for ARM L1 data cache line sizes is 64 bytes.  We
>   * use that value for aligning DMA buffers unless the board config has specified
> diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
> index 99b703e..0ee131d 100644
> --- a/arch/arm/include/asm/config.h
> +++ b/arch/arm/include/asm/config.h
> @@ -9,4 +9,14 @@
>  
>  #define CONFIG_LMB
>  #define CONFIG_SYS_BOOT_RAMDISK_HIGH
> +
> +#ifdef CONFIG_ARM64
> +/*
> + * Currently, GOT is used to relocate u-boot and
> + * configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
> + */
> +#define CONFIG_NEEDS_MANUAL_RELOC
> +#define CONFIG_PHYS_64BIT
> +#endif
> +
>  #endif
> diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
> index 79a9597..30a338e 100644
> --- a/arch/arm/include/asm/global_data.h
> +++ b/arch/arm/include/asm/global_data.h
> @@ -47,6 +47,10 @@ struct arch_global_data {
>  
>  #include <asm-generic/global_data.h>
>  
> -#define DECLARE_GLOBAL_DATA_PTR     register volatile gd_t *gd asm ("r8")
> +#ifdef CONFIG_ARM64
> +#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("x18")
> +#else
> +#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("r8")
> +#endif
>  
>  #endif /* __ASM_GBL_DATA_H */
> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> index 1fbc531..6a1f05a 100644
> --- a/arch/arm/include/asm/io.h
> +++ b/arch/arm/include/asm/io.h
> @@ -75,42 +75,45 @@ static inline phys_addr_t virt_to_phys(void * vaddr)
>  #define __arch_putw(v,a)		(*(volatile unsigned short *)(a) = (v))
>  #define __arch_putl(v,a)		(*(volatile unsigned int *)(a) = (v))
>  
> -extern inline void __raw_writesb(unsigned int addr, const void *data, int bytelen)
> +extern inline void __raw_writesb(unsigned long addr, const void *data,
> +				 int bytelen)
>  {
>  	uint8_t *buf = (uint8_t *)data;
>  	while(bytelen--)
>  		__arch_putb(*buf++, addr);
>  }
>  
> -extern inline void __raw_writesw(unsigned int addr, const void *data, int wordlen)
> +extern inline void __raw_writesw(unsigned long addr, const void *data,
> +				 int wordlen)
>  {
>  	uint16_t *buf = (uint16_t *)data;
>  	while(wordlen--)
>  		__arch_putw(*buf++, addr);
>  }
>  
> -extern inline void __raw_writesl(unsigned int addr, const void *data, int longlen)
> +extern inline void __raw_writesl(unsigned long addr, const void *data,
> +				 int longlen)
>  {
>  	uint32_t *buf = (uint32_t *)data;
>  	while(longlen--)
>  		__arch_putl(*buf++, addr);
>  }
>  
> -extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen)
> +extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
>  {
>  	uint8_t *buf = (uint8_t *)data;
>  	while(bytelen--)
>  		*buf++ = __arch_getb(addr);
>  }
>  
> -extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen)
> +extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
>  {
>  	uint16_t *buf = (uint16_t *)data;
>  	while(wordlen--)
>  		*buf++ = __arch_getw(addr);
>  }
>  
> -extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
> +extern inline void __raw_readsl(unsigned long addr, void *data, int longlen)
>  {
>  	uint32_t *buf = (uint32_t *)data;
>  	while(longlen--)
> diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
> index ff13f36..db8869e 100644
> --- a/arch/arm/include/asm/macro.h
> +++ b/arch/arm/include/asm/macro.h
> @@ -54,5 +54,44 @@
>  	bcs	1b
>  .endm
>  
> +#ifdef CONFIG_ARM64
> +/*
> + * Register aliases.
> + */
> +lr	.req	x30
> +
> +/*
> + * Branch according to exception level
> + */
> +.macro	switch_el1_el2, xreg, el1_label, el2_label, fail_label
> +	mrs	\xreg, CurrentEL
> +	cmp	\xreg, 0x4
> +	b.eq	\el1_label
> +	cmp	\xreg, 0x8
> +	b.eq	\el2_label
> +	b	\fail_label
> +.endm
> +
> +/*
> + * Branch if current processor is a slave,
> + * choose processor with all zero affinity value as the master.
> + */
> +.macro	branch_if_slave, xreg, slave_label
> +	mrs	\xreg, mpidr_el1
> +	tst	\xreg, #0xff		/* Test Affinity 0 */
> +	b.ne	\slave_label
> +	lsr	\xreg, \xreg, #8
> +	tst	\xreg, #0xff		/* Test Affinity 1 */
> +	b.ne	\slave_label
> +	lsr	\xreg, \xreg, #8
> +	tst	\xreg, #0xff		/* Test Affinity 2 */
> +	b.ne	\slave_label
> +	lsr	\xreg, \xreg, #16
> +	tst	\xreg, #0xff		/* Test Affinity 3 */
> +	b.ne	\slave_label
> +.endm
> +
> +#endif /* CONFIG_ARM64 */
> +
>  #endif /* __ASSEMBLY__ */
>  #endif /* __ASM_ARM_MACRO_H__ */
> diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h
> index c412486..9ba9add 100644
> --- a/arch/arm/include/asm/posix_types.h
> +++ b/arch/arm/include/asm/posix_types.h
> @@ -13,6 +13,8 @@
>  #ifndef __ARCH_ARM_POSIX_TYPES_H
>  #define __ARCH_ARM_POSIX_TYPES_H
>  
> +#include <config.h>
> +
>  /*
>   * This file is generally used by user-level software, so you need to
>   * be a little careful about namespace pollution etc.  Also, we cannot
> @@ -28,9 +30,17 @@ typedef int			__kernel_pid_t;
>  typedef unsigned short		__kernel_ipc_pid_t;
>  typedef unsigned short		__kernel_uid_t;
>  typedef unsigned short		__kernel_gid_t;
> +
> +#ifdef	CONFIG_ARM64
> +typedef unsigned long		__kernel_size_t;
> +typedef long			__kernel_ssize_t;
> +typedef long			__kernel_ptrdiff_t;
> +#else	/* CONFIG_ARM64 */
>  typedef unsigned int		__kernel_size_t;
>  typedef int			__kernel_ssize_t;
>  typedef int			__kernel_ptrdiff_t;
> +#endif	/* CONFIG_ARM64 */
> +
>  typedef long			__kernel_time_t;
>  typedef long			__kernel_suseconds_t;
>  typedef long			__kernel_clock_t;
> diff --git a/arch/arm/include/asm/proc-armv/ptrace.h b/arch/arm/include/asm/proc-armv/ptrace.h
> index 79cc644..fd280cb 100644
> --- a/arch/arm/include/asm/proc-armv/ptrace.h
> +++ b/arch/arm/include/asm/proc-armv/ptrace.h
> @@ -12,6 +12,25 @@
>  
>  #include <linux/config.h>
>  
> +#ifdef CONFIG_ARM64
> +
> +#define PCMASK		0
> +
> +#ifndef __ASSEMBLY__
> +
> +/*
> + * This struct defines the way the registers are stored
> + * on the stack during an exception.
> + */
> +struct pt_regs {
> +	unsigned long elr;
> +	unsigned long regs[31];
> +};
> +
> +#endif	/* __ASSEMBLY__ */
> +
> +#else	/* CONFIG_ARM64 */
> +
>  #define USR26_MODE	0x00
>  #define FIQ26_MODE	0x01
>  #define IRQ26_MODE	0x02
> @@ -106,4 +125,6 @@ static inline int valid_user_regs(struct pt_regs *regs)
>  
>  #endif	/* __ASSEMBLY__ */
>  
> +#endif	/* CONFIG_ARM64 */
> +
>  #endif
> diff --git a/arch/arm/include/asm/proc-armv/system.h b/arch/arm/include/asm/proc-armv/system.h
> index b4cfa68..19b2b44 100644
> --- a/arch/arm/include/asm/proc-armv/system.h
> +++ b/arch/arm/include/asm/proc-armv/system.h
> @@ -15,6 +15,60 @@
>  /*
>   * Save the current interrupt enable state & disable IRQs
>   */
> +#ifdef CONFIG_ARM64
> +
> +/*
> + * Save the current interrupt enable state
> + * and disable IRQs/FIQs
> + */
> +#define local_irq_save(flags)					\
> +	({							\
> +	asm volatile(						\
> +	"mrs	%0, daif"					\
> +	"msr	daifset, #3"					\
> +	: "=r" (flags)						\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +/*
> + * restore saved IRQ & FIQ state
> + */
> +#define local_irq_restore(flags)				\
> +	({							\
> +	asm volatile(						\
> +	"msr	daif, %0"					\
> +	:							\
> +	: "r" (flags)						\
> +	: "memory");						\
> +	})
> +
> +/*
> + * Enable IRQs/FIQs
> + */
> +#define local_irq_enable()					\
> +	({							\
> +	asm volatile(						\
> +	"msr	daifclr, #3"					\
> +	:							\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +/*
> + * Disable IRQs/FIQs
> + */
> +#define local_irq_disable()					\
> +	({							\
> +	asm volatile(						\
> +	"msr	daifset, #3"					\
> +	:							\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +#else	/* CONFIG_ARM64 */
> +
>  #define local_irq_save(x)					\
>  	({							\
>  		unsigned long temp;				\
> @@ -109,7 +163,10 @@
>  	: "r" (x)						\
>  	: "memory")
>  
> -#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
> +#endif	/* CONFIG_ARM64 */
> +
> +#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || \
> +	defined(CONFIG_ARM64)
>  /*
>   * On the StrongARM, "swp" is terminally broken since it bypasses the
>   * cache totally.  This means that the cache becomes inconsistent, and,
> diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
> index 760345f..e79f790 100644
> --- a/arch/arm/include/asm/system.h
> +++ b/arch/arm/include/asm/system.h
> @@ -1,6 +1,79 @@
>  #ifndef __ASM_ARM_SYSTEM_H
>  #define __ASM_ARM_SYSTEM_H
>  
> +#ifdef CONFIG_ARM64
> +
> +/*
> + * SCTLR_EL2 bits definitions
> + */
> +#define CR_M		(1 << 0)	/* MMU enable			*/
> +#define CR_A		(1 << 1)	/* Alignment abort enable	*/
> +#define CR_C		(1 << 2)	/* Dcache enable		*/
> +#define CR_SA		(1 << 3)	/* Stack Alignment Check Enable	*/
> +#define CR_I		(1 << 12)	/* Icache enable		*/
> +#define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
> +#define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
> +
> +#define PGTABLE_SIZE	(0x10000)
> +
> +#ifndef __ASSEMBLY__
> +
> +#define isb()				\
> +	({asm volatile(			\
> +	"isb" : : : "memory");		\
> +	})
> +
> +#define wfi()				\
> +	({asm volatile(			\
> +	"wfi" : : : "memory");		\
> +	})
> +
> +static inline unsigned int curent_el(void)
> +{
> +	unsigned int el;
> +	asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc");
> +	return el >> 2;
> +}
> +
> +static inline unsigned int get_sctlr(void)
> +{
> +	unsigned int el, val;
> +
> +	el = curent_el();
> +	if (el == 1)
> +		asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc");
> +	else if (el == 2)
> +		asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
> +	else
> +		panic("Not Supported Exception Level");
> +
> +	return val;
> +}
> +
> +static inline void set_sctlr(unsigned int val)
> +{
> +	unsigned int el;
> +
> +	el = curent_el();
> +	if (el == 1)
> +		asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc");
> +	else if (el == 2)
> +		asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
> +	else
> +		panic("Not Supported Exception Level");
> +
> +	asm volatile("isb");
> +}
> +
> +void __asm_flush_dcache_all(void);
> +void __asm_flush_dcache_range(u64 start, u64 end);
> +void __asm_invalidate_tlb_all(void);
> +void __asm_invalidate_icache_all(void);
> +
> +#endif	/* __ASSEMBLY__ */
> +
> +#else /* CONFIG_ARM64 */
> +
>  #ifdef __KERNEL__
>  
>  #define CPU_ARCH_UNKNOWN	0
> @@ -45,6 +118,8 @@
>  #define CR_AFE	(1 << 29)	/* Access flag enable			*/
>  #define CR_TE	(1 << 30)	/* Thumb exception enable		*/
>  
> +#define PGTABLE_SIZE		(4096 * 4)
> +
>  /*
>   * This is used to ensure the compiler did actually allocate the register we
>   * asked it for some inline assembly sequences.  Apparently we can't trust
> @@ -132,4 +207,6 @@ void mmu_page_table_flush(unsigned long start, unsigned long stop);
>  
>  #endif /* __KERNEL__ */
>  
> +#endif /* CONFIG_ARM64 */
> +
>  #endif
> diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
> index 71dc049..2326420 100644
> --- a/arch/arm/include/asm/types.h
> +++ b/arch/arm/include/asm/types.h
> @@ -39,7 +39,11 @@ typedef unsigned int u32;
>  typedef signed long long s64;
>  typedef unsigned long long u64;
>  
> +#ifdef	CONFIG_ARM64
> +#define BITS_PER_LONG 64
> +#else	/* CONFIG_ARM64 */
>  #define BITS_PER_LONG 32
> +#endif	/* CONFIG_ARM64 */
>  
>  /* Dma addresses are 32-bits wide.  */
>  
> diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h
> index 2b5fce8..cb81232 100644
> --- a/arch/arm/include/asm/u-boot.h
> +++ b/arch/arm/include/asm/u-boot.h
> @@ -44,6 +44,10 @@ typedef struct bd_info {
>  #endif /* !CONFIG_SYS_GENERIC_BOARD */
>  
>  /* For image.h:image_check_target_arch() */
> +#ifndef CONFIG_ARM64
>  #define IH_ARCH_DEFAULT IH_ARCH_ARM
> +#else
> +#define IH_ARCH_DEFAULT IH_ARCH_ARM64
> +#endif
>  
>  #endif	/* _U_BOOT_H_ */
> diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
> index 44593a8..0a228fb 100644
> --- a/arch/arm/include/asm/unaligned.h
> +++ b/arch/arm/include/asm/unaligned.h
> @@ -8,7 +8,7 @@
>  /*
>   * Select endianness
>   */
> -#ifndef __ARMEB__
> +#if __BYTE_ORDER == __LITTLE_ENDIAN
>  #define get_unaligned	__get_unaligned_le
>  #define put_unaligned	__put_unaligned_le
>  #else
> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> index 4e78723..03c31c7 100644
> --- a/arch/arm/lib/Makefile
> +++ b/arch/arm/lib/Makefile
> @@ -10,6 +10,9 @@ include $(TOPDIR)/config.mk
>  LIB	= $(obj)lib$(ARCH).o
>  LIBGCC	= $(obj)libgcc.o
>  
> +ifdef CONFIG_ARM64
> +SOBJS-y += crt0_64.o
> +else
>  GLSOBJS	+= _ashldi3.o
>  GLSOBJS	+= _ashrdi3.o
>  GLSOBJS	+= _divsi3.o
> @@ -21,9 +24,14 @@ GLSOBJS	+= _umodsi3.o
>  GLCOBJS	+= div0.o
>  
>  SOBJS-y += crt0.o
> +endif
>  
>  ifndef CONFIG_SPL_BUILD
> +ifdef CONFIG_ARM64
> +SOBJS-y += relocate_64.o
> +else
>  SOBJS-y += relocate.o
> +endif
>  ifndef CONFIG_SYS_GENERIC_BOARD
>  COBJS-y	+= board.o
>  endif
> @@ -38,11 +46,17 @@ else
>  COBJS-$(CONFIG_SPL_FRAMEWORK) += spl.o
>  endif
>  
> +ifdef CONFIG_ARM64
> +COBJS-y	+= interrupts_64.o
> +else
>  COBJS-y	+= interrupts.o
> +endif
>  COBJS-y	+= reset.o
>  
>  COBJS-y	+= cache.o
> +ifndef CONFIG_ARM64
>  COBJS-y	+= cache-cp15.o
> +endif
>  
>  SRCS	:= $(GLSOBJS:.o=.S) $(GLCOBJS:.o=.c) \
>  	   $(SOBJS-y:.o=.S) $(COBJS-y:.o=.c)
> diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
> index 34f50b0..d6d0833 100644
> --- a/arch/arm/lib/board.c
> +++ b/arch/arm/lib/board.c
> @@ -344,7 +344,7 @@ void board_init_f(ulong bootflag)
>  
>  #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
>  	/* reserve TLB table */
> -	gd->arch.tlb_size = 4096 * 4;
> +	gd->arch.tlb_size = PGTABLE_SIZE;
>  	addr -= gd->arch.tlb_size;
>  
>  	/* round down to next 64 kB limit */
> @@ -419,6 +419,7 @@ void board_init_f(ulong bootflag)
>  	}
>  #endif
>  
> +#ifndef CONFIG_ARM64
>  	/* setup stackpointer for exeptions */
>  	gd->irq_sp = addr_sp;
>  #ifdef CONFIG_USE_IRQ
> @@ -431,6 +432,10 @@ void board_init_f(ulong bootflag)
>  
>  	/* 8-byte alignment for ABI compliance */
>  	addr_sp &= ~0x07;
> +#else	/* CONFIG_ARM64 */
> +	/* 16-byte alignment for ABI compliance */
> +	addr_sp &= ~0x0f;
> +#endif	/* CONFIG_ARM64 */
>  #else
>  	addr_sp += 128;	/* leave 32 words for abort-stack   */
>  	gd->irq_sp = addr_sp;
> @@ -513,7 +518,15 @@ void board_init_r(gd_t *id, ulong dest_addr)
>  	ulong flash_size;
>  #endif
>  
> +	/*
> +	 * Relocate routines of serial_device first so that
> +	 * printf access the correct puts function. This is critical
> +	 * when CONFIG_NEEDS_MANUAL_RELOC is needed.
> +	 */
> +	serial_initialize();
> +
>  	gd->flags |= GD_FLG_RELOC;	/* tell others: relocation done */
> +
>  	bootstage_mark_name(BOOTSTAGE_ID_START_UBOOT_R, "board_init_r");
>  
>  	monitor_flash_len = _end_ofs;
> @@ -523,6 +536,15 @@ void board_init_r(gd_t *id, ulong dest_addr)
>  
>  	debug("monitor flash len: %08lX\n", monitor_flash_len);
>  	board_init();	/* Setup chipselects */
> +
> +#ifdef CONFIG_NEEDS_MANUAL_RELOC
> +	/*
> +	 * We have to relocate the command table manually
> +	 */
> +	fixup_cmdtable(ll_entry_start(cmd_tbl_t, cmd),
> +			ll_entry_count(cmd_tbl_t, cmd));
> +#endif /* CONFIG_NEEDS_MANUAL_RELOC */
> +
>  	/*
>  	 * TODO: printing of the clock inforamtion of the board is now
>  	 * implemented as part of bdinfo command. Currently only support for
> @@ -532,7 +554,6 @@ void board_init_r(gd_t *id, ulong dest_addr)
>  #ifdef CONFIG_CLOCKS
>  	set_cpu_clk_info(); /* Setup clock information */
>  #endif
> -	serial_initialize();
>  
>  	debug("Now running in RAM - U-Boot at: %08lx\n", dest_addr);
>  
> diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
> index eefb456..26e85f0 100644
> --- a/arch/arm/lib/bootm.c
> +++ b/arch/arm/lib/bootm.c
> @@ -222,6 +222,21 @@ static void boot_prep_linux(bootm_headers_t *images)
>  /* Subcommand: GO */
>  static void boot_jump_linux(bootm_headers_t *images, int flag)
>  {
> +#ifdef CONFIG_ARM64
> +	void (*kernel_entry)(void *fdt_addr);
> +	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
> +
> +	kernel_entry = (void (*)(void *fdt_addr))images->ep;
> +
> +	debug("## Transferring control to Linux (at address %lx)...\n",
> +		(ulong) kernel_entry);
> +	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
> +
> +	announce_and_cleanup(fake);
> +
> +	if (!fake)
> +		kernel_entry(images->ft_addr);
> +#else
>  	unsigned long machid = gd->bd->bi_arch_number;
>  	char *s;
>  	void (*kernel_entry)(int zero, int arch, uint params);
> @@ -248,6 +263,7 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
>  
>  	if (!fake)
>  		kernel_entry(0, machid, r2);
> +#endif
>  }
>  
>  /* Main Entry point for arm bootm implementation
> diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S
> new file mode 100644
> index 0000000..ddd46eb
> --- /dev/null
> +++ b/arch/arm/lib/crt0_64.S
> @@ -0,0 +1,116 @@
> +/*
> + * crt0 - C-runtime startup Code for AArch64 U-Boot
> + *
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * (C) Copyright 2012
> + * Albert ARIBAUD <albert.u.boot@aribaud.net>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <config.h>
> +#include <asm-offsets.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * This file handles the target-independent stages of the U-Boot
> + * start-up where a C runtime environment is needed. Its entry point
> + * is _main and is branched into from the target's start.S file.
> + *
> + * _main execution sequence is:
> + *
> + * 1. Set up initial environment for calling board_init_f().
> + *    This environment only provides a stack and a place to store
> + *    the GD ('global data') structure, both located in some readily
> + *    available RAM (SRAM, locked cache...). In this context, VARIABLE
> + *    global data, initialized or not (BSS), are UNAVAILABLE; only
> + *    CONSTANT initialized data are available.
> + *
> + * 2. Call board_init_f(). This function prepares the hardware for
> + *    execution from system RAM (DRAM, DDR...) As system RAM may not
> + *    be available yet, , board_init_f() must use the current GD to
> + *    store any data which must be passed on to later stages. These
> + *    data include the relocation destination, the future stack, and
> + *    the future GD location.
> + *
> + * (the following applies only to non-SPL builds)
> + *
> + * 3. Set up intermediate environment where the stack and GD are the
> + *    ones allocated by board_init_f() in system RAM, but BSS and
> + *    initialized non-const data are still not available.
> + *
> + * 4. Call relocate_code(). This function relocates U-Boot from its
> + *    current location into the relocation destination computed by
> + *    board_init_f().
> + *
> + * 5. Set up final environment for calling board_init_r(). This
> + *    environment has BSS (initialized to 0), initialized non-const
> + *    data (initialized to their intended value), and stack in system
> + *    RAM. GD has retained values set by board_init_f(). Some CPUs
> + *    have some work left to do at this point regarding memory, so
> + *    call c_runtime_cpu_setup.
> + *
> + * 6. Branch to board_init_r().
> + */
> +
> +ENTRY(_main)
> +
> +/*
> + * Set up initial C runtime environment and call board_init_f(0).
> + */
> +	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
> +	sub	x0, x0, #GD_SIZE	/* allocate one GD above SP */
> +	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
> +	mov	x18, sp			/* GD is above SP */
> +	mov	x0, #0
> +	bl	board_init_f
> +
> +/*
> + * Set up intermediate environment (new sp and gd) and call
> + * relocate_code(addr_moni). Trick here is that we'll return
> + * 'here' but relocated.
> + */
> +	ldr	x0, [x18, #GD_START_ADDR_SP]	/* x0 <- gd->start_addr_sp */
> +	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
> +	ldr	x18, [x18, #GD_BD]		/* x18 <- gd->bd */
> +	sub	x18, x18, #GD_SIZE		/* new GD is below bd */
> +
> +	adr	lr, relocation_return
> +	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
> +	add	lr, lr, x9	/* new return address after relocation */
> +	ldr	x0, [x18, #GD_RELOCADDR]	/* x0 <- gd->relocaddr */
> +	b	relocate_code
> +
> +relocation_return:
> +
> +/*
> + * Set up final (full) environment
> + */
> +	bl	c_runtime_cpu_setup		/* still call old routine */
> +
> +/*
> + * Clear BSS section
> + */
> +	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
> +	ldr	x0, =__bss_start
> +	add	x0, x0, x9			/* x0 <- __bss_start in RAM */
> +	ldr	x1, =__bss_end
> +	add	x1, x1, x9			/* x1 <- __bss_end in RAM */
> +	mov	x2, #0
> +clear_loop:
> +	str	x2, [x0]
> +	add	x0, x0, #8
> +	cmp	x0, x1
> +	b.lo	clear_loop
> +
> +	/* call board_init_r(gd_t *id, ulong dest_addr) */
> +	mov	x0, x18				/* gd_t */
> +	ldr	x1, [x18, #GD_RELOCADDR]	/* dest_addr */
> +	b	board_init_r			/* PC relative jump */
> +
> +	/* NOTREACHED - board_init_r() does not return */
> +
> +ENDPROC(_main)
> diff --git a/arch/arm/lib/interrupts_64.c b/arch/arm/lib/interrupts_64.c
> new file mode 100644
> index 0000000..b476722
> --- /dev/null
> +++ b/arch/arm/lib/interrupts_64.c
> @@ -0,0 +1,120 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <linux/compiler.h>
> +
> +
> +int interrupt_init(void)
> +{
> +	return 0;
> +}
> +
> +void enable_interrupts(void)
> +{
> +	return;
> +}
> +
> +int disable_interrupts(void)
> +{
> +	return 0;
> +}
> +
> +void show_regs(struct pt_regs *regs)
> +{
> +	int i;
> +
> +	printf("ELR:     %lx\n", regs->elr);
> +	printf("LR:      %lx\n", regs->regs[30]);
> +	for (i = 0; i < 29; i += 2)
> +		printf("x%-2d: %016lx x%-2d: %016lx\n",
> +		       i, regs->regs[i], i+1, regs->regs[i+1]);
> +	printf("\n");
> +}
> +
> +/*
> + * do_bad_sync handles the impossible case in the Synchronous Abort vector.
> + */
> +void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Synchronous Abort\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_irq handles the impossible case in the Irq vector.
> + */
> +void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Irq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_fiq handles the impossible case in the Fiq vector.
> + */
> +void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Fiq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_error handles the impossible case in the Error vector.
> + */
> +void do_bad_error(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Error\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_sync handles the Synchronous Abort exception.
> + */
> +void do_sync(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Synchronous Abort\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_irq handles the Irq exception.
> + */
> +void do_irq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Irq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_fiq handles the Fiq exception.
> + */
> +void do_fiq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Fiq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_error handles the Error exception.
> + * Errors are more likely to be processor specific,
> + * it is defined with weak attribute and can be redefined
> + * in processor specific code.
> + */
> +void __weak do_error(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Error\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> diff --git a/arch/arm/lib/relocate_64.S b/arch/arm/lib/relocate_64.S
> new file mode 100644
> index 0000000..29c3239
> --- /dev/null
> +++ b/arch/arm/lib/relocate_64.S
> @@ -0,0 +1,57 @@
> +/*
> + * relocate - common relocation function for AArch64 U-Boot
> + *
> + * (C) Copyright 2013
> + * Albert ARIBAUD <albert.u.boot@aribaud.net>
> + * David Feng <fenghua@phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * void relocate_code (addr_moni)
> + *
> + * This function relocates the monitor code.
> + *
> + * NOTE:
> + * GOT is used and configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
> + */
> +ENTRY(relocate_code)
> +	/*
> +	 * Copy u-boot from flash to RAM
> +	 */
> +	ldr	x1, =__image_copy_start	/* x1 <- copy source */
> +	cmp	x1, x0
> +	b.eq	relocate_done		/* skip relocation */
> +	mov	x2, x0			/* x2 <- copy destination */
> +	ldr	x3, =__image_copy_end	/* x3 <- source end address */
> +
> +copy_loop:
> +	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
> +	stp	x10, x11, [x2], #16	/* copy to   target address [x2] */
> +	cmp	x1, x3			/* until source end address [x3] */
> +	b.lo	copy_loop
> +
> +	/*
> +	 * Fix .reloc relocations
> +	 */
> +	ldr	x9, [x18, #GD_RELOC_OFF]/* x9 <- relocation offset */
> +	ldr	x1, =__rel_got_start	/* x1 <- rel got start ofs */
> +	add	x1, x1, x9		/* x1 <- rel got start in RAM */
> +	ldr	x2, =__rel_got_end	/* x2 <- rel got end ofs */
> +	add	x2, x2, x9		/* x2 <- rel got end in RAM */
> +fixloop:
> +	ldr	x10, [x1]
> +	add	x10, x10, x9		/* x10 <- address to be fixed up */
> +	str	x10, [x1]
> +	add	x1, x1, #8		/* each got entry is 8 bytes */
> +	cmp	x1, x2
> +	b.lo	fixloop
> +
> +relocate_done:
> +	ret
> +ENDPROC(relocate_code)
> diff --git a/common/image.c b/common/image.c
> index b0ae58f..4145354 100644
> --- a/common/image.c
> +++ b/common/image.c
> @@ -81,6 +81,7 @@ static const table_entry_t uimage_arch[] = {
>  	{	IH_ARCH_NDS32,		"nds32",	"NDS32",	},
>  	{	IH_ARCH_OPENRISC,	"or1k",		"OpenRISC 1000",},
>  	{	IH_ARCH_SANDBOX,	"sandbox",	"Sandbox",	},
> +	{	IH_ARCH_ARM64,		"arm64",	"AArch64",	},
>  	{	-1,			"",		"",		},
>  };
>  
> diff --git a/doc/README.arm64 b/doc/README.arm64
> new file mode 100644
> index 0000000..746ce6a
> --- /dev/null
> +++ b/doc/README.arm64
> @@ -0,0 +1,33 @@
> +U-boot for arm64
> +
> +Summary
> +=======
> +No hardware platform of arm64 is available now. The u-boot is
> +simulated on Foundation Model and Fast Model for ARMv8.
> +
> +Notes
> +=====
> +
> +1. Currenly, u-boot could be running at EL1 or EL2.

s/Currenly/Currently/

> +
> +2. Currenly, U-boot for arm64 is compiled with AArch64-gcc. AArch64-gcc

s/Currenly/Currently/

> +   use rela relocation format, it cannot be relocated during running. So,
> +   GOT is used to relocate u-boot and CONFIG_NEEDS_MANUAL_RELOC is needed.
> +
> +3. Fdt should be placed at a 2-megabyte boundary and within the first 512
> +   megabytes from the start of the kernel image. So, fdt_high should be
> +   defined specially.
> +   Please reference linux/Documentation/arm64/booting.txt for detail.
> +
> +4. Generic board is supported.
> +
> +5. CONFIG_ARM64 instead of CONFIG_ARMV8 is used to distinguish aarch64 and
> +   aarch32 specific codes.
> +
> +Contributor
> +===========
> +   Tom Rini       <trini@ti.com>
> +   Scott Wood     <scottwood@freescale.com>
> +   Simon Glass    <sjg@chromium.org>
> +   Sharma Bhupesh <bhupesh.sharma@freescale.com>
> +   Rob Herring    <robherring2@gmail.com>
> diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c
> index 8fb1765..fc5d7ef 100644
> --- a/examples/standalone/stubs.c
> +++ b/examples/standalone/stubs.c
> @@ -39,6 +39,20 @@ gd_t *global_data;
>  "	bctr\n"				\
>  	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11");
>  #elif defined(CONFIG_ARM)
> +#ifdef CONFIG_ARM64
> +/*
> + * x18 holds the pointer to the global_data, x9 is a call-clobbered
> + * register
> + */
> +#define EXPORT_FUNC(x) \
> +	asm volatile (			\
> +"	.globl " #x "\n"		\
> +#x ":\n"				\
> +"	ldr	x9, [x18, %0]\n"		\
> +"	ldr	x9, [x9, %1]\n"		\
> +"	br	x9\n"		\
> +	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x9");
> +#else
>  /*
>   * r8 holds the pointer to the global_data, ip is a call-clobbered
>   * register
> @@ -50,6 +64,7 @@ gd_t *global_data;
>  "	ldr	ip, [r8, %0]\n"		\
>  "	ldr	pc, [ip, %1]\n"		\
>  	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip");
> +#endif
>  #elif defined(CONFIG_MIPS)
>  /*
>   * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call-
> diff --git a/include/image.h b/include/image.h
> index ee6eb8d..7de2bb2 100644
> --- a/include/image.h
> +++ b/include/image.h
> @@ -156,6 +156,7 @@ struct lmb;
>  #define IH_ARCH_SANDBOX		19	/* Sandbox architecture (test only) */
>  #define IH_ARCH_NDS32	        20	/* ANDES Technology - NDS32  */
>  #define IH_ARCH_OPENRISC        21	/* OpenRISC 1000  */
> +#define IH_ARCH_ARM64		22	/* ARM64	*/
>  
>  /*
>   * Image Types
>
Rob Herring Oct. 3, 2013, 9:51 p.m. UTC | #2
On 10/03/2013 04:35 PM, Rob Herring wrote:
> On 09/26/2013 08:35 AM, fenghua@phytium.com.cn wrote:
>> From: David Feng <fenghua@phytium.com.cn>
>>
>> Signed-off-by: David Feng <fenghua@phytium.com.cn>
>> ---
>>  arch/arm/config.mk                      |    4 +
> 
> FYI, some recent mainline changes to config.mk break the build for me.
> 
>>  arch/arm/cpu/armv8/Makefile             |   38 +++++
>>  arch/arm/cpu/armv8/cache.S              |  130 +++++++++++++++++
>>  arch/arm/cpu/armv8/cache_v8.c           |  218 ++++++++++++++++++++++++++++
>>  arch/arm/cpu/armv8/config.mk            |   16 +++
>>  arch/arm/cpu/armv8/cpu.c                |   67 +++++++++
>>  arch/arm/cpu/armv8/exceptions.S         |  115 +++++++++++++++
>>  arch/arm/cpu/armv8/start.S              |  234 +++++++++++++++++++++++++++++++
>>  arch/arm/cpu/armv8/timer.c              |   80 +++++++++++
>>  arch/arm/cpu/armv8/tlb.S                |   30 ++++
>>  arch/arm/cpu/armv8/u-boot.lds           |   71 ++++++++++
>>  arch/arm/include/asm/arch-armv8/gpio.h  |   11 ++
>>  arch/arm/include/asm/arch-armv8/mmu.h   |  110 +++++++++++++++

Also, this directory is mutually exclusive with arch-${soc}. I think
this should be moved up a level. gpio.h is probably fine as a default
version.

Rob
York Sun Oct. 3, 2013, 9:54 p.m. UTC | #3
On 10/03/2013 02:51 PM, Rob Herring wrote:
> On 10/03/2013 04:35 PM, Rob Herring wrote:
>> On 09/26/2013 08:35 AM, fenghua@phytium.com.cn wrote:
>>> From: David Feng <fenghua@phytium.com.cn>
>>>
>>> Signed-off-by: David Feng <fenghua@phytium.com.cn>
>>> ---
>>>  arch/arm/config.mk                      |    4 +
>>
>> FYI, some recent mainline changes to config.mk break the build for me.
>>
>>>  arch/arm/cpu/armv8/Makefile             |   38 +++++
>>>  arch/arm/cpu/armv8/cache.S              |  130 +++++++++++++++++
>>>  arch/arm/cpu/armv8/cache_v8.c           |  218 ++++++++++++++++++++++++++++
>>>  arch/arm/cpu/armv8/config.mk            |   16 +++
>>>  arch/arm/cpu/armv8/cpu.c                |   67 +++++++++
>>>  arch/arm/cpu/armv8/exceptions.S         |  115 +++++++++++++++
>>>  arch/arm/cpu/armv8/start.S              |  234 +++++++++++++++++++++++++++++++
>>>  arch/arm/cpu/armv8/timer.c              |   80 +++++++++++
>>>  arch/arm/cpu/armv8/tlb.S                |   30 ++++
>>>  arch/arm/cpu/armv8/u-boot.lds           |   71 ++++++++++
>>>  arch/arm/include/asm/arch-armv8/gpio.h  |   11 ++
>>>  arch/arm/include/asm/arch-armv8/mmu.h   |  110 +++++++++++++++
> 
> Also, this directory is mutually exclusive with arch-${soc}. I think
> this should be moved up a level. gpio.h is probably fine as a default
> version.
> 

I am also concerned about gpio.h. I am forced to create a gpio.h file
even I don't use it when I add soc for armv8.

York
Tom Rini Oct. 3, 2013, 9:56 p.m. UTC | #4
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 10/03/2013 05:54 PM, York Sun wrote:
> On 10/03/2013 02:51 PM, Rob Herring wrote:
>> On 10/03/2013 04:35 PM, Rob Herring wrote:
>>> On 09/26/2013 08:35 AM, fenghua@phytium.com.cn wrote:
>>>> From: David Feng <fenghua@phytium.com.cn>
>>>>
>>>> Signed-off-by: David Feng <fenghua@phytium.com.cn>
>>>> ---
>>>>  arch/arm/config.mk                      |    4 +
>>>
>>> FYI, some recent mainline changes to config.mk break the build for me.
>>>
>>>>  arch/arm/cpu/armv8/Makefile             |   38 +++++
>>>>  arch/arm/cpu/armv8/cache.S              |  130 +++++++++++++++++
>>>>  arch/arm/cpu/armv8/cache_v8.c           |  218 ++++++++++++++++++++++++++++
>>>>  arch/arm/cpu/armv8/config.mk            |   16 +++
>>>>  arch/arm/cpu/armv8/cpu.c                |   67 +++++++++
>>>>  arch/arm/cpu/armv8/exceptions.S         |  115 +++++++++++++++
>>>>  arch/arm/cpu/armv8/start.S              |  234 +++++++++++++++++++++++++++++++
>>>>  arch/arm/cpu/armv8/timer.c              |   80 +++++++++++
>>>>  arch/arm/cpu/armv8/tlb.S                |   30 ++++
>>>>  arch/arm/cpu/armv8/u-boot.lds           |   71 ++++++++++
>>>>  arch/arm/include/asm/arch-armv8/gpio.h  |   11 ++
>>>>  arch/arm/include/asm/arch-armv8/mmu.h   |  110 +++++++++++++++
>>
>> Also, this directory is mutually exclusive with arch-${soc}. I think
>> this should be moved up a level. gpio.h is probably fine as a default
>> version.
>>
> 
> I am also concerned about gpio.h. I am forced to create a gpio.h file
> even I don't use it when I add soc for armv8.

Yes, gpio.h belongs elsewhere, and we should not be making symlinks for
arch-armv8, we should just have asm/armv8/ and asm/armv7/ and reference
them directly,

- -- 
Tom
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJSTefxAAoJENk4IS6UOR1WzkgQAK61cVCUk2ouSKChMs0bZkIf
xjiNzN+YSAjE49P5APl6Ai6XbIWUVNWlZnRwbZC3rAl7QsTM0c8tcx8G3e54QDQT
pvbH4nvpj79Zjkc48Td7C5ZP+pw+vXshkXnUJrGKQe5AUPnP6DFGbILHn1gcSDj9
IusRDO1+sr0zuyU9XTKopmWQvPddFoUUwsA3KaFAoqDQeuBWW/lbYiETbDRnPGdz
e9hmq7sX9j6iE2Q5Vut8OuiZdvF677AL2eoFhk4NZ25KDaTP0cAltvpFcrZheGrt
vBKkpAFiKeuZG6CDEhJu22OHGLrQb266WpqqzHyGnAoqYpUahfxOfP+Z/31SaNBz
T1TnPfgNTCmy5LxG/msXeYXR6CJg0uIXvQnBJk/oRAA+xyxEwIeEXpmMyTtC2ZPy
cHeBgeLUo2QetMtMEOyrnt/xauOjRU5s252qniRPfaSLOSbyF4ElxDMNcU0amOYH
ay+bp7cgmsIuMJV/Oza9k/SH+wkMlERud5gY85U3MUmNUSCt3UfOdwTZSOMnAucM
//Dli/bsD+MegfO+aXG/nDWDPkpIqfFwHYlbrLORD5LhUFPqTliZepj9U5ACFBc6
b6H0crU134XBfsWtlDs3ZvPFberP9THU0tmreAwPtDtAKkl2SG0VqV+P4obxXH/G
o4V7jZg+eLHSm1mMYgmz
=WywE
-----END PGP SIGNATURE-----
diff mbox

Patch

diff --git a/arch/arm/config.mk b/arch/arm/config.mk
index ce3903b..95c07ad 100644
--- a/arch/arm/config.mk
+++ b/arch/arm/config.mk
@@ -74,7 +74,9 @@  endif
 endif
 
 # needed for relocation
+ifndef CONFIG_ARM64
 LDFLAGS_u-boot += -pie
+endif
 
 #
 # FIXME: binutils versions < 2.22 have a bug in the assembler where
@@ -95,6 +97,8 @@  endif
 endif
 
 # check that only R_ARM_RELATIVE relocations are generated
+ifndef CONFIG_ARM64
 ifneq ($(CONFIG_SPL_BUILD),y)
 ALL-y	+= checkarmreloc
 endif
+endif
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
new file mode 100644
index 0000000..b216f27
--- /dev/null
+++ b/arch/arm/cpu/armv8/Makefile
@@ -0,0 +1,38 @@ 
+#
+# (C) Copyright 2000-2003
+# Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+#
+# SPDX-License-Identifier:	GPL-2.0+
+#
+
+include $(TOPDIR)/config.mk
+
+LIB	= $(obj)lib$(CPU).o
+
+START	:= start.o
+
+COBJS	+= cpu.o
+COBJS	+= timer.o
+COBJS	+= cache_v8.o
+
+SOBJS	+= exceptions.o
+SOBJS	+= cache.o
+SOBJS	+= tlb.o
+
+SRCS	:= $(START:.o=.S) $(COBJS:.o=.c)
+OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
+START	:= $(addprefix $(obj),$(START))
+
+all:	$(obj).depend $(START) $(LIB)
+
+$(LIB):	$(OBJS)
+	$(call cmd_link_o_target, $(OBJS))
+
+#########################################################################
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
new file mode 100644
index 0000000..419f169
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache.S
@@ -0,0 +1,130 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * void __asm_flush_dcache_level(level)
+ *
+ * clean and invalidate one level cache.
+ *
+ * x0: cache level
+ * x1~x9: clobbered
+ */
+ENTRY(__asm_flush_dcache_level)
+	lsl	x1, x0, #1
+	msr	csselr_el1, x1		/* select cache level */
+	isb				/* isb to sych the new cssr & csidr */
+	mrs	x6, ccsidr_el1		/* read the new ccsidr */
+	and	x2, x6, #7		/* x2 <- length of the cache lines */
+	add	x2, x2, #4		/* add 4 (line length offset) */
+	mov	x3, #0x3ff
+	and	x3, x3, x6, lsr #3	/* x3 <- maximum number of way size */
+	clz	w5, w3			/* bit position of way size */
+	mov	x4, #0x7fff
+	and	x4, x4, x1, lsr #13	/* x4 <- max number of the set size */
+	/* x1 <- cache level << 1 */
+	/* x2 <- line length offset */
+	/* x3 <- number of cache ways */
+	/* x4 <- number of cache sets */
+	/* x5 <- bit position of way size */
+
+loop_set:
+	mov	x6, x3			/* create working copy of way size */
+loop_way:
+	lsl	x7, x6, x5
+	orr	x9, x0, x7		/* map way and level to cisw value */
+	lsl	x7, x4, x2
+	orr	x9, x9, x7		/* map set number to cisw value */
+	dc	cisw, x9		/* clean & invalidate by set/way */
+	subs	x6, x6, #1		/* decrement the way */
+	b.ge	loop_way
+	subs	x4, x4, #1		/* decrement the set */
+	b.ge	loop_set
+
+	ret
+ENDPROC(__asm_flush_dcache_level)
+
+/*
+ * void __asm_flush_dcache_all(void)
+ *
+ * clean and invalidate all data cache by SET/WAY.
+ */
+ENTRY(__asm_flush_dcache_all)
+	dsb	sy
+	mov	x15, lr
+	mrs	x10, clidr_el1		/* read clidr */
+	lsr	x11, x10, #24
+	and	x11, x11, #0x7		/* x11 <- loc */
+	cbz	x11, finished		/* if loc is 0, no need to clean */
+	mov	x0, #0			/* start flush at cache level 0 */
+	/* x0  <- cache level */
+	/* x10 <- clidr_el1 */
+	/* x11 <- loc */
+
+loop_level:
+	lsl	x1, x0, #1
+	add	x1, x1, x0		/* x0 <- 3x cache level */
+	lsr	x1, x10, x1
+	and	x1, x1, #7		/* x1 <- cache type */
+	cmp	x1, #2
+	b.lt	skip			/* skip if no cache or icache */
+	bl	__asm_flush_dcache_level
+skip:
+	add	x0, x0, #1		/* increment cache level */
+	cmp	x11, x0
+	b.gt	loop_level
+
+finished:
+	mov	x0, #0
+	msr	csselr_el1, x0		/* swith back to cache level 0 */
+	dsb	sy
+	isb
+	mov	lr, x15
+	ret
+ENDPROC(__asm_flush_dcache_all)
+
+/*
+ * void __asm_flush_dcache_range(start, end)
+ *
+ * clean & invalidate data cache in the range
+ *
+ * x0: start address
+ * x1: end address
+ */
+ENTRY(__asm_flush_dcache_range)
+	mrs	x3, ctr_el0		/* read CTR */
+	lsr	x3, x3, #16
+	and	x3, x3, #0xf		/* cache line size encoding */
+	mov	x2, #4			/* bytes per word */
+	lsl	x2, x2, x3		/* actual cache line size */
+
+	/* x2 <- minimal cache line size in cache system */
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:      dc	civac, x0		/* clean & invalidate D/unified line */
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__asm_flush_dcache_range)
+
+/*
+ * void __asm_invalidate_icache_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_icache_all)
+	ic	ialluis
+	isb	sy
+	ret
+ENDPROC(__asm_invalidate_icache_all)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
new file mode 100644
index 0000000..34426fd
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -0,0 +1,218 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/system.h>
+#include <asm/arch/mmu.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+
+static void set_pgtable_section(u64 section, u64 memory_type)
+{
+	u64 *page_table = (u64 *)gd->arch.tlb_addr;
+	u64 value;
+
+	value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
+	value |= PMD_ATTRINDX(memory_type);
+	page_table[section] = value;
+}
+
+/* to activate the MMU we need to set up virtual memory */
+static void mmu_setup(void)
+{
+	int i, j, el;
+	bd_t *bd = gd->bd;
+
+	/* Setup an identity-mapping for all spaces */
+	for (i = 0; i < (PAGE_SIZE >> 3); i++)
+		set_pgtable_section(i, MT_DEVICE_NGNRNE);
+
+	/* Setup an identity-mapping for all RAM space */
+	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+		ulong start = bd->bi_dram[i].start;
+		ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size;
+		for (j = start >> SECTION_SHIFT;
+		     j < end >> SECTION_SHIFT; j++) {
+			set_pgtable_section(j, MT_NORMAL);
+		}
+	}
+
+	/* load TTBR0 */
+	el = curent_el();
+	if (el == 1)
+		asm volatile("msr ttbr0_el1, %0"
+			     : : "r" (gd->arch.tlb_addr) : "memory");
+	else if (el == 2)
+		asm volatile("msr ttbr0_el2, %0"
+			     : : "r" (gd->arch.tlb_addr) : "memory");
+	else
+		panic("Not Supported Exception Level");
+
+	/* enable the mmu */
+	set_sctlr(get_sctlr() | CR_M);
+}
+
+/*
+ * Performs a invalidation of the entire data cache at all levels
+ */
+void invalidate_dcache_all(void)
+{
+	__asm_flush_dcache_all();
+}
+
+/*
+ * Performs a clean & invalidation of the entire data cache at all levels
+ */
+void flush_dcache_all(void)
+{
+	__asm_flush_dcache_all();
+}
+
+/*
+ * Invalidates range in all levels of D-cache/unified cache
+ */
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+	__asm_flush_dcache_range(start, stop);
+}
+
+/*
+ * Flush range(clean & invalidate) from all levels of D-cache/unified cache
+ */
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+	__asm_flush_dcache_range(start, stop);
+}
+
+void dcache_enable(void)
+{
+	/* The data cache is not active unless the mmu is enabled */
+	if (!(get_sctlr() & CR_M)) {
+		invalidate_dcache_all();
+		__asm_invalidate_tlb_all();
+		mmu_setup();
+	}
+
+	set_sctlr(get_sctlr() | CR_C);
+}
+
+void dcache_disable(void)
+{
+	uint32_t sctlr;
+
+	sctlr = get_sctlr();
+
+	/* if cache isn't enabled no need to disable */
+	if (!(sctlr & CR_C))
+		return;
+
+	set_sctlr(sctlr & ~(CR_C|CR_M));
+
+	flush_dcache_all();
+	__asm_invalidate_tlb_all();
+}
+
+int dcache_status(void)
+{
+	return (get_sctlr() & CR_C) != 0;
+}
+
+#else	/* CONFIG_SYS_DCACHE_OFF */
+
+void invalidate_dcache_all(void)
+{
+}
+
+void flush_dcache_all(void)
+{
+}
+
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void dcache_enable(void)
+{
+}
+
+void dcache_disable(void)
+{
+}
+
+int dcache_status(void)
+{
+	return 0;
+}
+
+#endif	/* CONFIG_SYS_DCACHE_OFF */
+
+#ifndef CONFIG_SYS_ICACHE_OFF
+
+void icache_enable(void)
+{
+	set_sctlr(get_sctlr() | CR_I);
+}
+
+void icache_disable(void)
+{
+	set_sctlr(get_sctlr() & ~CR_I);
+}
+
+int icache_status(void)
+{
+	return (get_sctlr() & CR_I) != 0;
+}
+
+void invalidate_icache_all(void)
+{
+	__asm_invalidate_icache_all();
+}
+
+#else	/* CONFIG_SYS_ICACHE_OFF */
+
+void icache_enable(void)
+{
+}
+
+void icache_disable(void)
+{
+}
+
+int icache_status(void)
+{
+	return 0;
+}
+
+void invalidate_icache_all(void)
+{
+}
+
+#endif	/* CONFIG_SYS_ICACHE_OFF */
+
+/*
+ * Enable dCache & iCache, whether cache is actually enabled
+ * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
+ */
+void enable_caches(void)
+{
+	icache_enable();
+	dcache_enable();
+}
+
+/*
+ * Flush range from all levels of d-cache/unified-cache
+ */
+void flush_cache(unsigned long start, unsigned long size)
+{
+	flush_dcache_range(start, start + size);
+}
diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
new file mode 100644
index 0000000..9f36d59
--- /dev/null
+++ b/arch/arm/cpu/armv8/config.mk
@@ -0,0 +1,16 @@ 
+#
+# (C) Copyright 2002
+# Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+#
+# SPDX-License-Identifier:	GPL-2.0+
+#
+PLATFORM_RELFLAGS += -fno-common -ffixed-x18
+
+# SEE README.arm-unaligned-accesses
+PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
+PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
+
+PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
+PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
+PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
+PLATFORM_CPPFLAGS += -fpic
diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
new file mode 100644
index 0000000..83e73ab
--- /dev/null
+++ b/arch/arm/cpu/armv8/cpu.c
@@ -0,0 +1,67 @@ 
+/*
+ * (C) Copyright 2008 Texas Insturments
+ *
+ * (C) Copyright 2002
+ * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
+ * Marius Groeger <mgroeger@sysgo.de>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * CPU specific code
+ */
+
+#include <common.h>
+#include <command.h>
+#include <asm/system.h>
+#include <linux/compiler.h>
+
+void __weak cpu_cache_initialization(void){}
+
+int cleanup_before_linux(void)
+{
+	/*
+	 * this function is called just before we call linux
+	 * it prepares the processor for linux
+	 *
+	 * we turn off caches etc ...
+	 */
+#ifndef CONFIG_SPL_BUILD
+	disable_interrupts();
+#endif
+
+	/*
+	 * Turn off I-cache and invalidate it
+	 */
+	icache_disable();
+	invalidate_icache_all();
+
+	/*
+	 * turn off D-cache
+	 * dcache_disable() in turn flushes the d-cache and disables MMU
+	 */
+	dcache_disable();
+
+	/*
+	 * After D-cache is flushed and before it is disabled there may
+	 * be some new valid entries brought into the cache. We are sure
+	 * that these lines are not dirty and will not affect our execution.
+	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
+	 * is all we did during this. We have not pushed anything on to the
+	 * stack. Neither have we affected any static data)
+	 * So just invalidate the entire d-cache again to avoid coherency
+	 * problems for kernel
+	 */
+	invalidate_dcache_all();
+
+	/*
+	 * Some CPU need more cache attention before starting the kernel.
+	 */
+	cpu_cache_initialization();
+
+	return 0;
+}
diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
new file mode 100644
index 0000000..b2f62c9
--- /dev/null
+++ b/arch/arm/cpu/armv8/exceptions.S
@@ -0,0 +1,115 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/ptrace.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * Enter Exception.
+ * This will save the processor state that is ELR/X0~X30
+ * to the stack frame.
+ */
+.macro	exception_entry
+	stp	x29, x30, [sp, #-16]!
+	stp	x27, x28, [sp, #-16]!
+	stp	x25, x26, [sp, #-16]!
+	stp	x23, x24, [sp, #-16]!
+	stp	x21, x22, [sp, #-16]!
+	stp	x19, x20, [sp, #-16]!
+	stp	x17, x18, [sp, #-16]!
+	stp	x15, x16, [sp, #-16]!
+	stp	x13, x14, [sp, #-16]!
+	stp	x11, x12, [sp, #-16]!
+	stp	x9, x10, [sp, #-16]!
+	stp	x7, x8, [sp, #-16]!
+	stp	x5, x6, [sp, #-16]!
+	stp	x3, x4, [sp, #-16]!
+	stp	x1, x2, [sp, #-16]!
+
+	/* Could be running at EL1 or EL2 */
+	mrs	x11, CurrentEL
+	cmp	x11, 0x4
+	b.eq	1f
+	cmp	x11, 0x8
+	b.eq	2f
+	b	3f
+1:	mrs	x1, esr_el1
+	mrs	x2, elr_el1
+	b	3f
+2:	mrs	x1, esr_el2
+	mrs	x2, elr_el2
+3:
+	stp	x2, x0, [sp, #-16]!
+	mov	x0, sp
+.endm
+
+/*
+ * Exception vectors.
+ */
+	.align	11
+	.globl	vectors
+vectors:
+	.align	7
+	b	_do_bad_sync	/* Current EL Synchronous Thread */
+
+	.align	7
+	b	_do_bad_irq	/* Current EL IRQ Thread */
+
+	.align	7
+	b	_do_bad_fiq	/* Current EL FIQ Thread */
+
+	.align	7
+	b	_do_bad_error	/* Current EL Error Thread */
+
+	.align	7
+	b	_do_sync	/* Current EL Synchronous Handler */
+
+	.align	7
+	b	_do_irq		/* Current EL IRQ Handler */
+
+	.align	7
+	b	_do_fiq		/* Current EL FIQ Handler */
+
+	.align	7
+	b	_do_error	/* Current EL Error Handler */
+
+
+_do_bad_sync:
+	exception_entry
+	bl	do_bad_sync
+
+_do_bad_irq:
+	exception_entry
+	bl	do_bad_irq
+
+_do_bad_fiq:
+	exception_entry
+	bl	do_bad_fiq
+
+_do_bad_error:
+	exception_entry
+	bl	do_bad_error
+
+_do_sync:
+	exception_entry
+	bl	do_sync
+
+_do_irq:
+	exception_entry
+	bl	do_irq
+
+_do_fiq:
+	exception_entry
+	bl	do_fiq
+
+_do_error:
+	exception_entry
+	bl	do_error
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
new file mode 100644
index 0000000..28c8fe3
--- /dev/null
+++ b/arch/arm/cpu/armv8/start.S
@@ -0,0 +1,234 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+#include <asm/arch/mmu.h>
+
+/*************************************************************************
+ *
+ * Startup Code (reset vector)
+ *
+ *************************************************************************/
+
+.globl	_start
+_start:
+	b	reset
+
+	.align 3
+
+.globl	_TEXT_BASE
+_TEXT_BASE:
+	.quad	CONFIG_SYS_TEXT_BASE
+
+/*
+ * These are defined in the linker script.
+ */
+.globl	_end_ofs
+_end_ofs:
+	.quad	_end - _start
+
+.globl	_bss_start_ofs
+_bss_start_ofs:
+	.quad	__bss_start - _start
+
+.globl	_bss_end_ofs
+_bss_end_ofs:
+	.quad	__bss_end - _start
+
+reset:
+	/*
+	 * Could be EL3/EL2/EL1
+	 */
+	mrs	x0, CurrentEL
+	cmp	x0, #0xc
+	b.ne	reset_nonsecure			/* Not EL3 */
+
+	bl	setup_el3			/* EL3 initialization */
+
+	/*
+	 * MMU Disabled, iCache Disabled, dCache Disabled
+	 */
+reset_nonsecure:
+
+#ifdef CONFIG_BOOTING_EL1
+	switch_el1_el2 x0, 1f, 2f, 3f
+1:	b	3f
+2:	bl	setup_el2			/* EL2 initialization */
+3:
+#endif
+
+	/* Initialize vBAR/CPACR_EL1/MDSCR_EL1 */
+	adr	x0, vectors
+	switch_el1_el2 x1, 1f, 2f, 3f
+1:	msr	vbar_el1, x0
+	mov	x0, #3 << 20
+	msr	cpacr_el1, x0			/* Enable FP/SIMD */
+	msr	mdscr_el1, xzr
+	b	3f
+2:	msr	vbar_el2, x0
+3:
+
+	/* Cache/BPB/TLB Invalidate */
+	bl	__asm_flush_dcache_all		/* dCache clean & invalidate */
+	bl	__asm_invalidate_icache_all	/* iCache invalidate */
+	bl	__asm_invalidate_tlb_all	/* invalidate TLBs */
+
+	/* Processor specific initialization */
+	bl	lowlevel_init
+
+	branch_if_slave	x0, slave_cpu
+
+	/*
+	 * Master CPU
+	 */
+master_cpu:
+	bl	_main
+
+	/*
+	 * Slave CPUs
+	 */
+slave_cpu:
+	wfe
+	ldr	x1, =SECONDARY_CPU_MAILBOX
+	ldr	x0, [x1]
+	cbz	x0, slave_cpu
+	br	x0			/* branch to the given address */
+
+/*-------------------------------------------------------------------------*/
+
+WEAK(setup_el3)
+	mov	x0, #0x531	/* Non-secure EL0/EL1 | HVC | 64bit EL2 */
+	msr	scr_el3, x0
+	msr	cptr_el3, xzr	/* Disable coprocessor traps to EL3 */
+
+	/* GIC initialization */
+	branch_if_slave	x0, 2f
+
+	/* Master initialize distributor */
+	ldr	x1, =GIC_DIST_BASE	/* GICD_CTLR */
+	mov	w0, #0x3		/* Enable Group0 & Group1 */
+	str	w0, [x1]
+	ldr	w0, [x1, #0x4]		/* GICD_TYPER */
+	and	w2, w0, #0x1f		/* ITLinesNumber */
+	add	w2, w2, #0x1		/* Number of GICD_IGROUPR registers */
+	add	x1, x1, #0x80		/* GICD_IGROUPR */
+	mov	w0, #~0			/* All Group1 */
+1:	str	w0, [x1], #0x4
+	sub	w2, w2, #0x1
+	cbnz	w2, 1b
+	b	3f
+
+	/* Slave initialize distributor */
+2:	ldr	x1, =GIC_DIST_BASE	/* GICD_CTLR */
+	mov	w0, #~0			/* All Group1 */
+	str	w0, [x1, #0x80]
+
+	/* Initialize cpu interface */
+3:	ldr	x1, =GIC_CPU_BASE	/* GICC_CTLR */
+	mov	w0, #0x3		/* Enable Group0 & Group1 */
+	str	w0, [x1]
+
+	mov	w0, #0x1 << 7		/* Non-Secure access to GICC_PMR */
+	str	w0, [x1, #0x4]		/* GICC_PMR */
+
+	/* Counter frequency initialization */
+	ldr	x0, =CONFIG_SYS_CNTFRQ
+	msr	cntfrq_el0, x0
+
+	/* SCTLR_EL2 initialization */
+	msr	sctlr_el2, xzr
+
+	/* Return to the EL2_SP2 mode from EL3 */
+	mov	x0, #0x3c9		/* EL2_SP2 | D | A | I | F */
+	msr	elr_el3, lr
+	msr	spsr_el3, x0
+	eret
+ENDPROC(setup_el3)
+
+WEAK(setup_el2)
+	/* Initialize Generic Timers */
+	mrs	x0, cnthctl_el2
+	orr	x0, x0, #0x3		/* Enable EL1 access to timers */
+	msr	cnthctl_el2, x0
+	msr	cntvoff_el2, x0		/* Clear virtual offset */
+	mrs	x0, cntkctl_el1
+	orr	x0, x0, #0x3		/* EL0 access to counters */
+	msr	cntkctl_el1, x0
+
+	/* Initilize MPID/MPIDR registers */
+	mrs	x0, midr_el1
+	mrs	x1, mpidr_el1
+	msr	vpidr_el2, x0
+	msr	vmpidr_el2, x1
+
+	/* Disable coprocessor traps */
+	mov	x0, #0x33ff
+	msr	cptr_el2, x0		/* Disable coprocessor traps to EL2 */
+	msr	hstr_el2, xzr		/* Disable CP15 traps to EL2 */
+
+	/* Initialize HCR_EL2 */
+	mov	x0, #(1 << 31)		/* 64bit EL1 */
+	orr	x0, x0, #(1 << 29)	/* Disable HVC */
+	msr	hcr_el2, x0
+
+	/* SCTLR_EL1 initialization */
+	mov	x0, #0x0800
+	movk	x0, #0x30d0, lsl #16
+	msr	sctlr_el1, x0
+
+	/* Return to the EL1_SP1 mode from EL2 */
+	mov	x0, #0x3c5		/* EL1_SP1 | D | A | I | F */
+	msr	elr_el2, lr
+	msr	spsr_el2, x0
+	eret
+ENDPROC(setup_el2)
+
+WEAK(lowlevel_init)
+	ret
+ENDPROC(lowlevel_init)
+
+/*-------------------------------------------------------------------------*/
+
+ENTRY(c_runtime_cpu_setup)
+	/* If I-cache is enabled invalidate it */
+#ifndef CONFIG_SYS_ICACHE_OFF
+	ic	iallu			/* I+BTB cache invalidate */
+	isb	sy
+#endif
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+	/*
+	 * Setup MAIR and TCR. Using 512GB address range.
+	 */
+	ldr	x0, =MEMORY_ATTRIBUTES
+	ldr	x1, =TCR_FLAGS
+
+	switch_el1_el2 x2, 1f, 2f, 3f
+1:	orr	x1, x1, TCR_EL1_IPS_40BIT
+	msr	mair_el1, x0
+	msr	tcr_el1, x1
+	b	3f
+2:	orr	x1, x1, TCR_EL2_IPS_40BIT
+	msr	mair_el2, x0
+	msr	tcr_el2, x1
+3:
+#endif
+
+	/* Relocate vBAR */
+	adr	x0, vectors
+	switch_el1_el2 x1, 1f, 2f, 3f
+1:	msr	vbar_el1, x0
+	b	3f
+2:	msr	vbar_el2, x0
+3:
+
+	ret
+ENDPROC(c_runtime_cpu_setup)
diff --git a/arch/arm/cpu/armv8/timer.c b/arch/arm/cpu/armv8/timer.c
new file mode 100644
index 0000000..9605e84
--- /dev/null
+++ b/arch/arm/cpu/armv8/timer.c
@@ -0,0 +1,80 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <div64.h>
+#include <linux/compiler.h>
+
+/*
+ * Genertic Timer implementation of __udelay/get_timer/get_ticks/get_tbclk
+ * functions. If any other timers used, another implementation should be
+ * placed in platform code.
+ */
+
+static inline u64 get_cntfrq(void)
+{
+	u64 cntfrq;
+	asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq));
+	return cntfrq;
+}
+
+static inline u64 tick_to_time(u64 tick)
+{
+	tick *= CONFIG_SYS_HZ;
+	do_div(tick, get_cntfrq());
+	return tick;
+}
+
+static inline u64 time_to_tick(u64 time)
+{
+	time *= get_cntfrq();
+	do_div(time, CONFIG_SYS_HZ);
+	return time;
+}
+
+/*
+ * Generic timer implementation of get_tbclk()
+ */
+ulong __weak get_tbclk(void)
+{
+	return CONFIG_SYS_HZ;
+}
+
+/*
+ * Generic timer implementation of get_timer()
+ */
+ulong __weak get_timer(ulong base)
+{
+	u64 cval;
+
+	isb();
+	asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
+
+	return tick_to_time(cval) - base;
+}
+
+/*
+ * Generic timer implementation of get_ticks()
+ */
+unsigned long long __weak get_ticks(void)
+{
+	return get_timer(0);
+}
+
+/*
+ * Generic timer implementation of __udelay()
+ */
+void __weak __udelay(ulong usec)
+{
+	unsigned long ticks, limit;
+
+	limit = get_ticks() + usec/1000;
+
+	do {
+		ticks = get_ticks();
+	} while (ticks < limit);
+}
diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
new file mode 100644
index 0000000..6bb1e1f
--- /dev/null
+++ b/arch/arm/cpu/armv8/tlb.S
@@ -0,0 +1,30 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+
+/*
+ * void __asm_invalidate_tlb_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_tlb_all)
+	switch_el1_el2 x9, 1f, 2f, 3f
+1:	tlbi	vmalle1
+	dsb	sy
+	isb
+	b	3f
+2:	tlbi	alle2
+	dsb	sy
+	isb
+3:
+	ret
+ENDPROC(__asm_invalidate_tlb_all)
diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
new file mode 100644
index 0000000..328d477
--- /dev/null
+++ b/arch/arm/cpu/armv8/u-boot.lds
@@ -0,0 +1,71 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+ENTRY(_start)
+SECTIONS
+{
+	. = 0x00000000;
+
+	. = ALIGN(8);
+	.text :
+	{
+		*(.__image_copy_start)
+		CPUDIR/start.o (.text*)
+		*(.text*)
+	}
+
+	. = ALIGN(8);
+	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
+
+	. = ALIGN(8);
+	.data : {
+		*(.data*)
+	}
+
+	. = ALIGN(8);
+
+	. = .;
+
+	. = ALIGN(8);
+	.u_boot_list : {
+		KEEP(*(SORT(.u_boot_list*)));
+	}
+
+	. = ALIGN(8);
+	.reloc : {
+		__rel_got_start = .;
+		*(.got)
+		__rel_got_end = .;
+	}
+
+	.image_copy_end :
+	{
+		*(.__image_copy_end)
+	}
+
+	_end = .;
+
+	. = ALIGN(8);
+	.bss : {
+		__bss_start = .;
+		*(.bss*)
+		 . = ALIGN(8);
+		__bss_end = .;
+	}
+
+	/DISCARD/ : { *(.dynsym) }
+	/DISCARD/ : { *(.dynstr*) }
+	/DISCARD/ : { *(.dynamic*) }
+	/DISCARD/ : { *(.plt*) }
+	/DISCARD/ : { *(.interp*) }
+	/DISCARD/ : { *(.gnu*) }
+}
diff --git a/arch/arm/include/asm/arch-armv8/gpio.h b/arch/arm/include/asm/arch-armv8/gpio.h
new file mode 100644
index 0000000..afe7ece
--- /dev/null
+++ b/arch/arm/include/asm/arch-armv8/gpio.h
@@ -0,0 +1,11 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _ASM_ARMV8_GPIO_H_
+#define _ASM_ARMV8_GPIO_H_
+
+#endif	/* _ASM_ARMV8_GPIO_H_ */
diff --git a/arch/arm/include/asm/arch-armv8/mmu.h b/arch/arm/include/asm/arch-armv8/mmu.h
new file mode 100644
index 0000000..33b3246
--- /dev/null
+++ b/arch/arm/include/asm/arch-armv8/mmu.h
@@ -0,0 +1,110 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _ASM_ARMV8_MMU_H_
+#define _ASM_ARMV8_MMU_H_
+
+#ifdef __ASSEMBLY__
+#define _AC(X, Y)	X
+#else
+#define _AC(X, Y)	(X##Y)
+#endif
+
+#define UL(x)		_AC(x, UL)
+
+/***************************************************************/
+/*
+ * The following definitions are related each other, shoud be
+ * calculated specifically.
+ */
+#define VA_BITS			(39)
+
+/* PAGE_SHIFT determines the page size */
+#undef  PAGE_SIZE
+#define PAGE_SHIFT		16
+#define PAGE_SIZE		(1 << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
+
+/*
+ * section address mask and size definitions.
+ */
+#define SECTION_SHIFT		29
+#define SECTION_SIZE		(UL(1) << SECTION_SHIFT)
+#define SECTION_MASK		(~(SECTION_SIZE-1))
+/***************************************************************/
+
+/*
+ * Memory types
+ */
+#define MT_DEVICE_NGNRNE	0
+#define MT_DEVICE_NGNRE		1
+#define MT_DEVICE_GRE		2
+#define MT_NORMAL_NC		3
+#define MT_NORMAL		4
+
+#define MEMORY_ATTRIBUTES	((0x00 << MT_DEVICE_NGNRNE*8) |	\
+				(0x04 << MT_DEVICE_NGNRE*8) |	\
+				(0x0c << MT_DEVICE_GRE*8) |	\
+				(0x44 << MT_NORMAL_NC*8) |	\
+				(0xff << MT_NORMAL*8))
+
+/*
+ * Hardware page table definitions.
+ *
+ * Level 2 descriptor (PMD).
+ */
+#define PMD_TYPE_MASK		(3 << 0)
+#define PMD_TYPE_FAULT		(0 << 0)
+#define PMD_TYPE_TABLE		(3 << 0)
+#define PMD_TYPE_SECT		(1 << 0)
+
+/*
+ * Section
+ */
+#define PMD_SECT_S		(3 << 8)
+#define PMD_SECT_AF		(1 << 10)
+#define PMD_SECT_NG		(1 << 11)
+#define PMD_SECT_PXN		(UL(1) << 53)
+#define PMD_SECT_UXN		(UL(1) << 54)
+
+/*
+ * AttrIndx[2:0]
+ */
+#define PMD_ATTRINDX(t)		((t) << 2)
+#define PMD_ATTRINDX_MASK	(7 << 2)
+
+/*
+ * TCR flags.
+ */
+#define TCR_T0SZ(x)		((64 - (x)) << 0)
+#define TCR_IRGN_NC		(0 << 8)
+#define TCR_IRGN_WBWA		(1 << 8)
+#define TCR_IRGN_WT		(2 << 8)
+#define TCR_IRGN_WBNWA		(3 << 8)
+#define TCR_IRGN_MASK		(3 << 8)
+#define TCR_ORGN_NC		(0 << 10)
+#define TCR_ORGN_WBWA		(1 << 10)
+#define TCR_ORGN_WT		(2 << 10)
+#define TCR_ORGN_WBNWA		(3 << 10)
+#define TCR_ORGN_MASK		(3 << 10)
+#define TCR_SHARED_NON		(0 << 12)
+#define TCR_SHARED_OUTER	(1 << 12)
+#define TCR_SHARED_INNER	(2 << 12)
+#define TCR_TG0_4K		(0 << 14)
+#define TCR_TG0_64K		(1 << 14)
+#define TCR_TG0_16K		(2 << 14)
+#define TCR_EL1_IPS_40BIT	(2 << 32)
+#define TCR_EL2_IPS_40BIT	(2 << 16)
+
+/* PTWs cacheable, inner/outer WBWA and non-shareable */
+#define TCR_FLAGS		(TCR_TG0_64K |		\
+				TCR_SHARED_NON |	\
+				TCR_ORGN_WBWA |		\
+				TCR_IRGN_WBWA |		\
+				TCR_T0SZ(VA_BITS))
+
+#endif /* _ASM_ARMV8_MMU_H_ */
diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h
index c3489f1..71a9966 100644
--- a/arch/arm/include/asm/byteorder.h
+++ b/arch/arm/include/asm/byteorder.h
@@ -23,10 +23,22 @@ 
 #  define __SWAB_64_THRU_32__
 #endif
 
+#ifdef	CONFIG_ARM64
+
+#ifdef __AARCH64EB__
+#include <linux/byteorder/big_endian.h>
+#else
+#include <linux/byteorder/little_endian.h>
+#endif
+
+#else	/* CONFIG_ARM64 */
+
 #ifdef __ARMEB__
 #include <linux/byteorder/big_endian.h>
 #else
 #include <linux/byteorder/little_endian.h>
 #endif
 
+#endif	/* CONFIG_ARM64 */
+
 #endif
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index 6d60a4a..ddebbc8 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -11,6 +11,8 @@ 
 
 #include <asm/system.h>
 
+#ifndef CONFIG_ARM64
+
 /*
  * Invalidate L2 Cache using co-proc instruction
  */
@@ -28,6 +30,9 @@  void l2_cache_disable(void);
 void set_section_dcache(int section, enum dcache_option option);
 
 void dram_bank_mmu_setup(int bank);
+
+#endif
+
 /*
  * The current upper bound for ARM L1 data cache line sizes is 64 bytes.  We
  * use that value for aligning DMA buffers unless the board config has specified
diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
index 99b703e..0ee131d 100644
--- a/arch/arm/include/asm/config.h
+++ b/arch/arm/include/asm/config.h
@@ -9,4 +9,14 @@ 
 
 #define CONFIG_LMB
 #define CONFIG_SYS_BOOT_RAMDISK_HIGH
+
+#ifdef CONFIG_ARM64
+/*
+ * Currently, GOT is used to relocate u-boot and
+ * configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
+ */
+#define CONFIG_NEEDS_MANUAL_RELOC
+#define CONFIG_PHYS_64BIT
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
index 79a9597..30a338e 100644
--- a/arch/arm/include/asm/global_data.h
+++ b/arch/arm/include/asm/global_data.h
@@ -47,6 +47,10 @@  struct arch_global_data {
 
 #include <asm-generic/global_data.h>
 
-#define DECLARE_GLOBAL_DATA_PTR     register volatile gd_t *gd asm ("r8")
+#ifdef CONFIG_ARM64
+#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("x18")
+#else
+#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("r8")
+#endif
 
 #endif /* __ASM_GBL_DATA_H */
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1fbc531..6a1f05a 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -75,42 +75,45 @@  static inline phys_addr_t virt_to_phys(void * vaddr)
 #define __arch_putw(v,a)		(*(volatile unsigned short *)(a) = (v))
 #define __arch_putl(v,a)		(*(volatile unsigned int *)(a) = (v))
 
-extern inline void __raw_writesb(unsigned int addr, const void *data, int bytelen)
+extern inline void __raw_writesb(unsigned long addr, const void *data,
+				 int bytelen)
 {
 	uint8_t *buf = (uint8_t *)data;
 	while(bytelen--)
 		__arch_putb(*buf++, addr);
 }
 
-extern inline void __raw_writesw(unsigned int addr, const void *data, int wordlen)
+extern inline void __raw_writesw(unsigned long addr, const void *data,
+				 int wordlen)
 {
 	uint16_t *buf = (uint16_t *)data;
 	while(wordlen--)
 		__arch_putw(*buf++, addr);
 }
 
-extern inline void __raw_writesl(unsigned int addr, const void *data, int longlen)
+extern inline void __raw_writesl(unsigned long addr, const void *data,
+				 int longlen)
 {
 	uint32_t *buf = (uint32_t *)data;
 	while(longlen--)
 		__arch_putl(*buf++, addr);
 }
 
-extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen)
+extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
 {
 	uint8_t *buf = (uint8_t *)data;
 	while(bytelen--)
 		*buf++ = __arch_getb(addr);
 }
 
-extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen)
+extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
 {
 	uint16_t *buf = (uint16_t *)data;
 	while(wordlen--)
 		*buf++ = __arch_getw(addr);
 }
 
-extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
+extern inline void __raw_readsl(unsigned long addr, void *data, int longlen)
 {
 	uint32_t *buf = (uint32_t *)data;
 	while(longlen--)
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
index ff13f36..db8869e 100644
--- a/arch/arm/include/asm/macro.h
+++ b/arch/arm/include/asm/macro.h
@@ -54,5 +54,44 @@ 
 	bcs	1b
 .endm
 
+#ifdef CONFIG_ARM64
+/*
+ * Register aliases.
+ */
+lr	.req	x30
+
+/*
+ * Branch according to exception level
+ */
+.macro	switch_el1_el2, xreg, el1_label, el2_label, fail_label
+	mrs	\xreg, CurrentEL
+	cmp	\xreg, 0x4
+	b.eq	\el1_label
+	cmp	\xreg, 0x8
+	b.eq	\el2_label
+	b	\fail_label
+.endm
+
+/*
+ * Branch if current processor is a slave,
+ * choose processor with all zero affinity value as the master.
+ */
+.macro	branch_if_slave, xreg, slave_label
+	mrs	\xreg, mpidr_el1
+	tst	\xreg, #0xff		/* Test Affinity 0 */
+	b.ne	\slave_label
+	lsr	\xreg, \xreg, #8
+	tst	\xreg, #0xff		/* Test Affinity 1 */
+	b.ne	\slave_label
+	lsr	\xreg, \xreg, #8
+	tst	\xreg, #0xff		/* Test Affinity 2 */
+	b.ne	\slave_label
+	lsr	\xreg, \xreg, #16
+	tst	\xreg, #0xff		/* Test Affinity 3 */
+	b.ne	\slave_label
+.endm
+
+#endif /* CONFIG_ARM64 */
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_ARM_MACRO_H__ */
diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h
index c412486..9ba9add 100644
--- a/arch/arm/include/asm/posix_types.h
+++ b/arch/arm/include/asm/posix_types.h
@@ -13,6 +13,8 @@ 
 #ifndef __ARCH_ARM_POSIX_TYPES_H
 #define __ARCH_ARM_POSIX_TYPES_H
 
+#include <config.h>
+
 /*
  * This file is generally used by user-level software, so you need to
  * be a little careful about namespace pollution etc.  Also, we cannot
@@ -28,9 +30,17 @@  typedef int			__kernel_pid_t;
 typedef unsigned short		__kernel_ipc_pid_t;
 typedef unsigned short		__kernel_uid_t;
 typedef unsigned short		__kernel_gid_t;
+
+#ifdef	CONFIG_ARM64
+typedef unsigned long		__kernel_size_t;
+typedef long			__kernel_ssize_t;
+typedef long			__kernel_ptrdiff_t;
+#else	/* CONFIG_ARM64 */
 typedef unsigned int		__kernel_size_t;
 typedef int			__kernel_ssize_t;
 typedef int			__kernel_ptrdiff_t;
+#endif	/* CONFIG_ARM64 */
+
 typedef long			__kernel_time_t;
 typedef long			__kernel_suseconds_t;
 typedef long			__kernel_clock_t;
diff --git a/arch/arm/include/asm/proc-armv/ptrace.h b/arch/arm/include/asm/proc-armv/ptrace.h
index 79cc644..fd280cb 100644
--- a/arch/arm/include/asm/proc-armv/ptrace.h
+++ b/arch/arm/include/asm/proc-armv/ptrace.h
@@ -12,6 +12,25 @@ 
 
 #include <linux/config.h>
 
+#ifdef CONFIG_ARM64
+
+#define PCMASK		0
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This struct defines the way the registers are stored
+ * on the stack during an exception.
+ */
+struct pt_regs {
+	unsigned long elr;
+	unsigned long regs[31];
+};
+
+#endif	/* __ASSEMBLY__ */
+
+#else	/* CONFIG_ARM64 */
+
 #define USR26_MODE	0x00
 #define FIQ26_MODE	0x01
 #define IRQ26_MODE	0x02
@@ -106,4 +125,6 @@  static inline int valid_user_regs(struct pt_regs *regs)
 
 #endif	/* __ASSEMBLY__ */
 
+#endif	/* CONFIG_ARM64 */
+
 #endif
diff --git a/arch/arm/include/asm/proc-armv/system.h b/arch/arm/include/asm/proc-armv/system.h
index b4cfa68..19b2b44 100644
--- a/arch/arm/include/asm/proc-armv/system.h
+++ b/arch/arm/include/asm/proc-armv/system.h
@@ -15,6 +15,60 @@ 
 /*
  * Save the current interrupt enable state & disable IRQs
  */
+#ifdef CONFIG_ARM64
+
+/*
+ * Save the current interrupt enable state
+ * and disable IRQs/FIQs
+ */
+#define local_irq_save(flags)					\
+	({							\
+	asm volatile(						\
+	"mrs	%0, daif"					\
+	"msr	daifset, #3"					\
+	: "=r" (flags)						\
+	:							\
+	: "memory");						\
+	})
+
+/*
+ * restore saved IRQ & FIQ state
+ */
+#define local_irq_restore(flags)				\
+	({							\
+	asm volatile(						\
+	"msr	daif, %0"					\
+	:							\
+	: "r" (flags)						\
+	: "memory");						\
+	})
+
+/*
+ * Enable IRQs/FIQs
+ */
+#define local_irq_enable()					\
+	({							\
+	asm volatile(						\
+	"msr	daifclr, #3"					\
+	:							\
+	:							\
+	: "memory");						\
+	})
+
+/*
+ * Disable IRQs/FIQs
+ */
+#define local_irq_disable()					\
+	({							\
+	asm volatile(						\
+	"msr	daifset, #3"					\
+	:							\
+	:							\
+	: "memory");						\
+	})
+
+#else	/* CONFIG_ARM64 */
+
 #define local_irq_save(x)					\
 	({							\
 		unsigned long temp;				\
@@ -109,7 +163,10 @@ 
 	: "r" (x)						\
 	: "memory")
 
-#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
+#endif	/* CONFIG_ARM64 */
+
+#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || \
+	defined(CONFIG_ARM64)
 /*
  * On the StrongARM, "swp" is terminally broken since it bypasses the
  * cache totally.  This means that the cache becomes inconsistent, and,
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 760345f..e79f790 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -1,6 +1,79 @@ 
 #ifndef __ASM_ARM_SYSTEM_H
 #define __ASM_ARM_SYSTEM_H
 
+#ifdef CONFIG_ARM64
+
+/*
+ * SCTLR_EL2 bits definitions
+ */
+#define CR_M		(1 << 0)	/* MMU enable			*/
+#define CR_A		(1 << 1)	/* Alignment abort enable	*/
+#define CR_C		(1 << 2)	/* Dcache enable		*/
+#define CR_SA		(1 << 3)	/* Stack Alignment Check Enable	*/
+#define CR_I		(1 << 12)	/* Icache enable		*/
+#define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
+#define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
+
+#define PGTABLE_SIZE	(0x10000)
+
+#ifndef __ASSEMBLY__
+
+#define isb()				\
+	({asm volatile(			\
+	"isb" : : : "memory");		\
+	})
+
+#define wfi()				\
+	({asm volatile(			\
+	"wfi" : : : "memory");		\
+	})
+
+static inline unsigned int curent_el(void)
+{
+	unsigned int el;
+	asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc");
+	return el >> 2;
+}
+
+static inline unsigned int get_sctlr(void)
+{
+	unsigned int el, val;
+
+	el = curent_el();
+	if (el == 1)
+		asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc");
+	else if (el == 2)
+		asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
+	else
+		panic("Not Supported Exception Level");
+
+	return val;
+}
+
+static inline void set_sctlr(unsigned int val)
+{
+	unsigned int el;
+
+	el = curent_el();
+	if (el == 1)
+		asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc");
+	else if (el == 2)
+		asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
+	else
+		panic("Not Supported Exception Level");
+
+	asm volatile("isb");
+}
+
+void __asm_flush_dcache_all(void);
+void __asm_flush_dcache_range(u64 start, u64 end);
+void __asm_invalidate_tlb_all(void);
+void __asm_invalidate_icache_all(void);
+
+#endif	/* __ASSEMBLY__ */
+
+#else /* CONFIG_ARM64 */
+
 #ifdef __KERNEL__
 
 #define CPU_ARCH_UNKNOWN	0
@@ -45,6 +118,8 @@ 
 #define CR_AFE	(1 << 29)	/* Access flag enable			*/
 #define CR_TE	(1 << 30)	/* Thumb exception enable		*/
 
+#define PGTABLE_SIZE		(4096 * 4)
+
 /*
  * This is used to ensure the compiler did actually allocate the register we
  * asked it for some inline assembly sequences.  Apparently we can't trust
@@ -132,4 +207,6 @@  void mmu_page_table_flush(unsigned long start, unsigned long stop);
 
 #endif /* __KERNEL__ */
 
+#endif /* CONFIG_ARM64 */
+
 #endif
diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
index 71dc049..2326420 100644
--- a/arch/arm/include/asm/types.h
+++ b/arch/arm/include/asm/types.h
@@ -39,7 +39,11 @@  typedef unsigned int u32;
 typedef signed long long s64;
 typedef unsigned long long u64;
 
+#ifdef	CONFIG_ARM64
+#define BITS_PER_LONG 64
+#else	/* CONFIG_ARM64 */
 #define BITS_PER_LONG 32
+#endif	/* CONFIG_ARM64 */
 
 /* Dma addresses are 32-bits wide.  */
 
diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h
index 2b5fce8..cb81232 100644
--- a/arch/arm/include/asm/u-boot.h
+++ b/arch/arm/include/asm/u-boot.h
@@ -44,6 +44,10 @@  typedef struct bd_info {
 #endif /* !CONFIG_SYS_GENERIC_BOARD */
 
 /* For image.h:image_check_target_arch() */
+#ifndef CONFIG_ARM64
 #define IH_ARCH_DEFAULT IH_ARCH_ARM
+#else
+#define IH_ARCH_DEFAULT IH_ARCH_ARM64
+#endif
 
 #endif	/* _U_BOOT_H_ */
diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
index 44593a8..0a228fb 100644
--- a/arch/arm/include/asm/unaligned.h
+++ b/arch/arm/include/asm/unaligned.h
@@ -8,7 +8,7 @@ 
 /*
  * Select endianness
  */
-#ifndef __ARMEB__
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 #define get_unaligned	__get_unaligned_le
 #define put_unaligned	__put_unaligned_le
 #else
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 4e78723..03c31c7 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -10,6 +10,9 @@  include $(TOPDIR)/config.mk
 LIB	= $(obj)lib$(ARCH).o
 LIBGCC	= $(obj)libgcc.o
 
+ifdef CONFIG_ARM64
+SOBJS-y += crt0_64.o
+else
 GLSOBJS	+= _ashldi3.o
 GLSOBJS	+= _ashrdi3.o
 GLSOBJS	+= _divsi3.o
@@ -21,9 +24,14 @@  GLSOBJS	+= _umodsi3.o
 GLCOBJS	+= div0.o
 
 SOBJS-y += crt0.o
+endif
 
 ifndef CONFIG_SPL_BUILD
+ifdef CONFIG_ARM64
+SOBJS-y += relocate_64.o
+else
 SOBJS-y += relocate.o
+endif
 ifndef CONFIG_SYS_GENERIC_BOARD
 COBJS-y	+= board.o
 endif
@@ -38,11 +46,17 @@  else
 COBJS-$(CONFIG_SPL_FRAMEWORK) += spl.o
 endif
 
+ifdef CONFIG_ARM64
+COBJS-y	+= interrupts_64.o
+else
 COBJS-y	+= interrupts.o
+endif
 COBJS-y	+= reset.o
 
 COBJS-y	+= cache.o
+ifndef CONFIG_ARM64
 COBJS-y	+= cache-cp15.o
+endif
 
 SRCS	:= $(GLSOBJS:.o=.S) $(GLCOBJS:.o=.c) \
 	   $(SOBJS-y:.o=.S) $(COBJS-y:.o=.c)
diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
index 34f50b0..d6d0833 100644
--- a/arch/arm/lib/board.c
+++ b/arch/arm/lib/board.c
@@ -344,7 +344,7 @@  void board_init_f(ulong bootflag)
 
 #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
 	/* reserve TLB table */
-	gd->arch.tlb_size = 4096 * 4;
+	gd->arch.tlb_size = PGTABLE_SIZE;
 	addr -= gd->arch.tlb_size;
 
 	/* round down to next 64 kB limit */
@@ -419,6 +419,7 @@  void board_init_f(ulong bootflag)
 	}
 #endif
 
+#ifndef CONFIG_ARM64
 	/* setup stackpointer for exeptions */
 	gd->irq_sp = addr_sp;
 #ifdef CONFIG_USE_IRQ
@@ -431,6 +432,10 @@  void board_init_f(ulong bootflag)
 
 	/* 8-byte alignment for ABI compliance */
 	addr_sp &= ~0x07;
+#else	/* CONFIG_ARM64 */
+	/* 16-byte alignment for ABI compliance */
+	addr_sp &= ~0x0f;
+#endif	/* CONFIG_ARM64 */
 #else
 	addr_sp += 128;	/* leave 32 words for abort-stack   */
 	gd->irq_sp = addr_sp;
@@ -513,7 +518,15 @@  void board_init_r(gd_t *id, ulong dest_addr)
 	ulong flash_size;
 #endif
 
+	/*
+	 * Relocate routines of serial_device first so that
+	 * printf access the correct puts function. This is critical
+	 * when CONFIG_NEEDS_MANUAL_RELOC is needed.
+	 */
+	serial_initialize();
+
 	gd->flags |= GD_FLG_RELOC;	/* tell others: relocation done */
+
 	bootstage_mark_name(BOOTSTAGE_ID_START_UBOOT_R, "board_init_r");
 
 	monitor_flash_len = _end_ofs;
@@ -523,6 +536,15 @@  void board_init_r(gd_t *id, ulong dest_addr)
 
 	debug("monitor flash len: %08lX\n", monitor_flash_len);
 	board_init();	/* Setup chipselects */
+
+#ifdef CONFIG_NEEDS_MANUAL_RELOC
+	/*
+	 * We have to relocate the command table manually
+	 */
+	fixup_cmdtable(ll_entry_start(cmd_tbl_t, cmd),
+			ll_entry_count(cmd_tbl_t, cmd));
+#endif /* CONFIG_NEEDS_MANUAL_RELOC */
+
 	/*
 	 * TODO: printing of the clock inforamtion of the board is now
 	 * implemented as part of bdinfo command. Currently only support for
@@ -532,7 +554,6 @@  void board_init_r(gd_t *id, ulong dest_addr)
 #ifdef CONFIG_CLOCKS
 	set_cpu_clk_info(); /* Setup clock information */
 #endif
-	serial_initialize();
 
 	debug("Now running in RAM - U-Boot at: %08lx\n", dest_addr);
 
diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
index eefb456..26e85f0 100644
--- a/arch/arm/lib/bootm.c
+++ b/arch/arm/lib/bootm.c
@@ -222,6 +222,21 @@  static void boot_prep_linux(bootm_headers_t *images)
 /* Subcommand: GO */
 static void boot_jump_linux(bootm_headers_t *images, int flag)
 {
+#ifdef CONFIG_ARM64
+	void (*kernel_entry)(void *fdt_addr);
+	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
+
+	kernel_entry = (void (*)(void *fdt_addr))images->ep;
+
+	debug("## Transferring control to Linux (at address %lx)...\n",
+		(ulong) kernel_entry);
+	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
+
+	announce_and_cleanup(fake);
+
+	if (!fake)
+		kernel_entry(images->ft_addr);
+#else
 	unsigned long machid = gd->bd->bi_arch_number;
 	char *s;
 	void (*kernel_entry)(int zero, int arch, uint params);
@@ -248,6 +263,7 @@  static void boot_jump_linux(bootm_headers_t *images, int flag)
 
 	if (!fake)
 		kernel_entry(0, machid, r2);
+#endif
 }
 
 /* Main Entry point for arm bootm implementation
diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S
new file mode 100644
index 0000000..ddd46eb
--- /dev/null
+++ b/arch/arm/lib/crt0_64.S
@@ -0,0 +1,116 @@ 
+/*
+ * crt0 - C-runtime startup Code for AArch64 U-Boot
+ *
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * (C) Copyright 2012
+ * Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <config.h>
+#include <asm-offsets.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * This file handles the target-independent stages of the U-Boot
+ * start-up where a C runtime environment is needed. Its entry point
+ * is _main and is branched into from the target's start.S file.
+ *
+ * _main execution sequence is:
+ *
+ * 1. Set up initial environment for calling board_init_f().
+ *    This environment only provides a stack and a place to store
+ *    the GD ('global data') structure, both located in some readily
+ *    available RAM (SRAM, locked cache...). In this context, VARIABLE
+ *    global data, initialized or not (BSS), are UNAVAILABLE; only
+ *    CONSTANT initialized data are available.
+ *
+ * 2. Call board_init_f(). This function prepares the hardware for
+ *    execution from system RAM (DRAM, DDR...) As system RAM may not
+ *    be available yet, , board_init_f() must use the current GD to
+ *    store any data which must be passed on to later stages. These
+ *    data include the relocation destination, the future stack, and
+ *    the future GD location.
+ *
+ * (the following applies only to non-SPL builds)
+ *
+ * 3. Set up intermediate environment where the stack and GD are the
+ *    ones allocated by board_init_f() in system RAM, but BSS and
+ *    initialized non-const data are still not available.
+ *
+ * 4. Call relocate_code(). This function relocates U-Boot from its
+ *    current location into the relocation destination computed by
+ *    board_init_f().
+ *
+ * 5. Set up final environment for calling board_init_r(). This
+ *    environment has BSS (initialized to 0), initialized non-const
+ *    data (initialized to their intended value), and stack in system
+ *    RAM. GD has retained values set by board_init_f(). Some CPUs
+ *    have some work left to do at this point regarding memory, so
+ *    call c_runtime_cpu_setup.
+ *
+ * 6. Branch to board_init_r().
+ */
+
+ENTRY(_main)
+
+/*
+ * Set up initial C runtime environment and call board_init_f(0).
+ */
+	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
+	sub	x0, x0, #GD_SIZE	/* allocate one GD above SP */
+	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
+	mov	x18, sp			/* GD is above SP */
+	mov	x0, #0
+	bl	board_init_f
+
+/*
+ * Set up intermediate environment (new sp and gd) and call
+ * relocate_code(addr_moni). Trick here is that we'll return
+ * 'here' but relocated.
+ */
+	ldr	x0, [x18, #GD_START_ADDR_SP]	/* x0 <- gd->start_addr_sp */
+	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
+	ldr	x18, [x18, #GD_BD]		/* x18 <- gd->bd */
+	sub	x18, x18, #GD_SIZE		/* new GD is below bd */
+
+	adr	lr, relocation_return
+	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
+	add	lr, lr, x9	/* new return address after relocation */
+	ldr	x0, [x18, #GD_RELOCADDR]	/* x0 <- gd->relocaddr */
+	b	relocate_code
+
+relocation_return:
+
+/*
+ * Set up final (full) environment
+ */
+	bl	c_runtime_cpu_setup		/* still call old routine */
+
+/*
+ * Clear BSS section
+ */
+	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
+	ldr	x0, =__bss_start
+	add	x0, x0, x9			/* x0 <- __bss_start in RAM */
+	ldr	x1, =__bss_end
+	add	x1, x1, x9			/* x1 <- __bss_end in RAM */
+	mov	x2, #0
+clear_loop:
+	str	x2, [x0]
+	add	x0, x0, #8
+	cmp	x0, x1
+	b.lo	clear_loop
+
+	/* call board_init_r(gd_t *id, ulong dest_addr) */
+	mov	x0, x18				/* gd_t */
+	ldr	x1, [x18, #GD_RELOCADDR]	/* dest_addr */
+	b	board_init_r			/* PC relative jump */
+
+	/* NOTREACHED - board_init_r() does not return */
+
+ENDPROC(_main)
diff --git a/arch/arm/lib/interrupts_64.c b/arch/arm/lib/interrupts_64.c
new file mode 100644
index 0000000..b476722
--- /dev/null
+++ b/arch/arm/lib/interrupts_64.c
@@ -0,0 +1,120 @@ 
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <linux/compiler.h>
+
+
+int interrupt_init(void)
+{
+	return 0;
+}
+
+void enable_interrupts(void)
+{
+	return;
+}
+
+int disable_interrupts(void)
+{
+	return 0;
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	int i;
+
+	printf("ELR:     %lx\n", regs->elr);
+	printf("LR:      %lx\n", regs->regs[30]);
+	for (i = 0; i < 29; i += 2)
+		printf("x%-2d: %016lx x%-2d: %016lx\n",
+		       i, regs->regs[i], i+1, regs->regs[i+1]);
+	printf("\n");
+}
+
+/*
+ * do_bad_sync handles the impossible case in the Synchronous Abort vector.
+ */
+void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Synchronous Abort\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_irq handles the impossible case in the Irq vector.
+ */
+void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Irq\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_fiq handles the impossible case in the Fiq vector.
+ */
+void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Fiq\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_error handles the impossible case in the Error vector.
+ */
+void do_bad_error(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Error\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_sync handles the Synchronous Abort exception.
+ */
+void do_sync(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("\"Synchronous Abort\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_irq handles the Irq exception.
+ */
+void do_irq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("\"Irq\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_fiq handles the Fiq exception.
+ */
+void do_fiq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("\"Fiq\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_error handles the Error exception.
+ * Errors are more likely to be processor specific,
+ * it is defined with weak attribute and can be redefined
+ * in processor specific code.
+ */
+void __weak do_error(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("\"Error\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
diff --git a/arch/arm/lib/relocate_64.S b/arch/arm/lib/relocate_64.S
new file mode 100644
index 0000000..29c3239
--- /dev/null
+++ b/arch/arm/lib/relocate_64.S
@@ -0,0 +1,57 @@ 
+/*
+ * relocate - common relocation function for AArch64 U-Boot
+ *
+ * (C) Copyright 2013
+ * Albert ARIBAUD <albert.u.boot@aribaud.net>
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <linux/linkage.h>
+
+/*
+ * void relocate_code (addr_moni)
+ *
+ * This function relocates the monitor code.
+ *
+ * NOTE:
+ * GOT is used and configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
+ */
+ENTRY(relocate_code)
+	/*
+	 * Copy u-boot from flash to RAM
+	 */
+	ldr	x1, =__image_copy_start	/* x1 <- copy source */
+	cmp	x1, x0
+	b.eq	relocate_done		/* skip relocation */
+	mov	x2, x0			/* x2 <- copy destination */
+	ldr	x3, =__image_copy_end	/* x3 <- source end address */
+
+copy_loop:
+	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
+	stp	x10, x11, [x2], #16	/* copy to   target address [x2] */
+	cmp	x1, x3			/* until source end address [x3] */
+	b.lo	copy_loop
+
+	/*
+	 * Fix .reloc relocations
+	 */
+	ldr	x9, [x18, #GD_RELOC_OFF]/* x9 <- relocation offset */
+	ldr	x1, =__rel_got_start	/* x1 <- rel got start ofs */
+	add	x1, x1, x9		/* x1 <- rel got start in RAM */
+	ldr	x2, =__rel_got_end	/* x2 <- rel got end ofs */
+	add	x2, x2, x9		/* x2 <- rel got end in RAM */
+fixloop:
+	ldr	x10, [x1]
+	add	x10, x10, x9		/* x10 <- address to be fixed up */
+	str	x10, [x1]
+	add	x1, x1, #8		/* each got entry is 8 bytes */
+	cmp	x1, x2
+	b.lo	fixloop
+
+relocate_done:
+	ret
+ENDPROC(relocate_code)
diff --git a/common/image.c b/common/image.c
index b0ae58f..4145354 100644
--- a/common/image.c
+++ b/common/image.c
@@ -81,6 +81,7 @@  static const table_entry_t uimage_arch[] = {
 	{	IH_ARCH_NDS32,		"nds32",	"NDS32",	},
 	{	IH_ARCH_OPENRISC,	"or1k",		"OpenRISC 1000",},
 	{	IH_ARCH_SANDBOX,	"sandbox",	"Sandbox",	},
+	{	IH_ARCH_ARM64,		"arm64",	"AArch64",	},
 	{	-1,			"",		"",		},
 };
 
diff --git a/doc/README.arm64 b/doc/README.arm64
new file mode 100644
index 0000000..746ce6a
--- /dev/null
+++ b/doc/README.arm64
@@ -0,0 +1,33 @@ 
+U-boot for arm64
+
+Summary
+=======
+No hardware platform of arm64 is available now. The u-boot is
+simulated on Foundation Model and Fast Model for ARMv8.
+
+Notes
+=====
+
+1. Currenly, u-boot could be running at EL1 or EL2.
+
+2. Currenly, U-boot for arm64 is compiled with AArch64-gcc. AArch64-gcc
+   use rela relocation format, it cannot be relocated during running. So,
+   GOT is used to relocate u-boot and CONFIG_NEEDS_MANUAL_RELOC is needed.
+
+3. Fdt should be placed at a 2-megabyte boundary and within the first 512
+   megabytes from the start of the kernel image. So, fdt_high should be
+   defined specially.
+   Please reference linux/Documentation/arm64/booting.txt for detail.
+
+4. Generic board is supported.
+
+5. CONFIG_ARM64 instead of CONFIG_ARMV8 is used to distinguish aarch64 and
+   aarch32 specific codes.
+
+Contributor
+===========
+   Tom Rini       <trini@ti.com>
+   Scott Wood     <scottwood@freescale.com>
+   Simon Glass    <sjg@chromium.org>
+   Sharma Bhupesh <bhupesh.sharma@freescale.com>
+   Rob Herring    <robherring2@gmail.com>
diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c
index 8fb1765..fc5d7ef 100644
--- a/examples/standalone/stubs.c
+++ b/examples/standalone/stubs.c
@@ -39,6 +39,20 @@  gd_t *global_data;
 "	bctr\n"				\
 	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11");
 #elif defined(CONFIG_ARM)
+#ifdef CONFIG_ARM64
+/*
+ * x18 holds the pointer to the global_data, x9 is a call-clobbered
+ * register
+ */
+#define EXPORT_FUNC(x) \
+	asm volatile (			\
+"	.globl " #x "\n"		\
+#x ":\n"				\
+"	ldr	x9, [x18, %0]\n"		\
+"	ldr	x9, [x9, %1]\n"		\
+"	br	x9\n"		\
+	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x9");
+#else
 /*
  * r8 holds the pointer to the global_data, ip is a call-clobbered
  * register
@@ -50,6 +64,7 @@  gd_t *global_data;
 "	ldr	ip, [r8, %0]\n"		\
 "	ldr	pc, [ip, %1]\n"		\
 	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip");
+#endif
 #elif defined(CONFIG_MIPS)
 /*
  * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call-
diff --git a/include/image.h b/include/image.h
index ee6eb8d..7de2bb2 100644
--- a/include/image.h
+++ b/include/image.h
@@ -156,6 +156,7 @@  struct lmb;
 #define IH_ARCH_SANDBOX		19	/* Sandbox architecture (test only) */
 #define IH_ARCH_NDS32	        20	/* ANDES Technology - NDS32  */
 #define IH_ARCH_OPENRISC        21	/* OpenRISC 1000  */
+#define IH_ARCH_ARM64		22	/* ARM64	*/
 
 /*
  * Image Types