Patchwork [U-Boot,v7,1/5] core support of arm64

login
register
mail settings
Submitter fenghua@phytium.com.cn
Date Sept. 10, 2013, 8:12 a.m.
Message ID <1378800731-16462-2-git-send-email-fenghua@phytium.com.cn>
Download mbox | patch
Permalink /patch/273781/
State Superseded
Delegated to: Albert ARIBAUD
Headers show

Comments

fenghua@phytium.com.cn - Sept. 10, 2013, 8:12 a.m.
From: David Feng <fenghua@phytium.com.cn>

Signed-off-by: David Feng <fenghua@phytium.com.cn>
---
 MAINTAINERS                             |    4 +
 arch/arm/config.mk                      |    4 +
 arch/arm/cpu/armv8/Makefile             |   56 +++++++
 arch/arm/cpu/armv8/cache.S              |  145 ++++++++++++++++
 arch/arm/cpu/armv8/cache_v8.c           |  275 +++++++++++++++++++++++++++++++
 arch/arm/cpu/armv8/config.mk            |   31 ++++
 arch/arm/cpu/armv8/cpu.c                |   68 ++++++++
 arch/arm/cpu/armv8/crt0.S               |  130 +++++++++++++++
 arch/arm/cpu/armv8/exceptions.S         |  173 +++++++++++++++++++
 arch/arm/cpu/armv8/interrupts.c         |  158 ++++++++++++++++++
 arch/arm/cpu/armv8/relocate.S           |   73 ++++++++
 arch/arm/cpu/armv8/start.S              |  253 ++++++++++++++++++++++++++++
 arch/arm/cpu/armv8/timer.c              |   97 +++++++++++
 arch/arm/cpu/armv8/tlb.S                |   45 +++++
 arch/arm/cpu/armv8/u-boot.lds           |   83 ++++++++++
 arch/arm/include/asm/arch-armv8/gpio.h  |   26 +++
 arch/arm/include/asm/arch-armv8/mmu.h   |  117 +++++++++++++
 arch/arm/include/asm/byteorder.h        |   12 ++
 arch/arm/include/asm/cache.h            |    5 +
 arch/arm/include/asm/config.h           |   10 ++
 arch/arm/include/asm/global_data.h      |    6 +-
 arch/arm/include/asm/io.h               |   15 +-
 arch/arm/include/asm/macro.h            |   34 ++++
 arch/arm/include/asm/posix_types.h      |   17 ++
 arch/arm/include/asm/proc-armv/ptrace.h |   37 +++++
 arch/arm/include/asm/proc-armv/system.h |   59 ++++++-
 arch/arm/include/asm/system.h           |   78 +++++++++
 arch/arm/include/asm/types.h            |    4 +
 arch/arm/include/asm/u-boot.h           |    4 +
 arch/arm/include/asm/unaligned.h        |    2 +-
 arch/arm/lib/Makefile                   |    8 +
 arch/arm/lib/board.c                    |   16 +-
 arch/arm/lib/bootm.c                    |   20 ++-
 common/image.c                          |    1 +
 doc/README.armv8                        |   10 ++
 examples/standalone/stubs.c             |   15 ++
 include/image.h                         |    1 +
 37 files changed, 2080 insertions(+), 12 deletions(-)
 create mode 100644 arch/arm/cpu/armv8/Makefile
 create mode 100644 arch/arm/cpu/armv8/cache.S
 create mode 100644 arch/arm/cpu/armv8/cache_v8.c
 create mode 100644 arch/arm/cpu/armv8/config.mk
 create mode 100644 arch/arm/cpu/armv8/cpu.c
 create mode 100644 arch/arm/cpu/armv8/crt0.S
 create mode 100644 arch/arm/cpu/armv8/exceptions.S
 create mode 100644 arch/arm/cpu/armv8/interrupts.c
 create mode 100644 arch/arm/cpu/armv8/relocate.S
 create mode 100644 arch/arm/cpu/armv8/start.S
 create mode 100644 arch/arm/cpu/armv8/timer.c
 create mode 100644 arch/arm/cpu/armv8/tlb.S
 create mode 100644 arch/arm/cpu/armv8/u-boot.lds
 create mode 100644 arch/arm/include/asm/arch-armv8/gpio.h
 create mode 100644 arch/arm/include/asm/arch-armv8/mmu.h
 create mode 100644 doc/README.armv8
Rob Herring - Sept. 10, 2013, 10:32 p.m.
On 09/10/2013 03:12 AM, fenghua@phytium.com.cn wrote:
> From: David Feng <fenghua@phytium.com.cn>
> 
> Signed-off-by: David Feng <fenghua@phytium.com.cn>
> ---
>  MAINTAINERS                             |    4 +
>  arch/arm/config.mk                      |    4 +
>  arch/arm/cpu/armv8/Makefile             |   56 +++++++
>  arch/arm/cpu/armv8/cache.S              |  145 ++++++++++++++++
>  arch/arm/cpu/armv8/cache_v8.c           |  275 +++++++++++++++++++++++++++++++
>  arch/arm/cpu/armv8/config.mk            |   31 ++++
>  arch/arm/cpu/armv8/cpu.c                |   68 ++++++++
>  arch/arm/cpu/armv8/crt0.S               |  130 +++++++++++++++
>  arch/arm/cpu/armv8/exceptions.S         |  173 +++++++++++++++++++
>  arch/arm/cpu/armv8/interrupts.c         |  158 ++++++++++++++++++
>  arch/arm/cpu/armv8/relocate.S           |   73 ++++++++
>  arch/arm/cpu/armv8/start.S              |  253 ++++++++++++++++++++++++++++
>  arch/arm/cpu/armv8/timer.c              |   97 +++++++++++
>  arch/arm/cpu/armv8/tlb.S                |   45 +++++
>  arch/arm/cpu/armv8/u-boot.lds           |   83 ++++++++++
>  arch/arm/include/asm/arch-armv8/gpio.h  |   26 +++
>  arch/arm/include/asm/arch-armv8/mmu.h   |  117 +++++++++++++
>  arch/arm/include/asm/byteorder.h        |   12 ++
>  arch/arm/include/asm/cache.h            |    5 +
>  arch/arm/include/asm/config.h           |   10 ++
>  arch/arm/include/asm/global_data.h      |    6 +-
>  arch/arm/include/asm/io.h               |   15 +-
>  arch/arm/include/asm/macro.h            |   34 ++++
>  arch/arm/include/asm/posix_types.h      |   17 ++
>  arch/arm/include/asm/proc-armv/ptrace.h |   37 +++++
>  arch/arm/include/asm/proc-armv/system.h |   59 ++++++-
>  arch/arm/include/asm/system.h           |   78 +++++++++
>  arch/arm/include/asm/types.h            |    4 +
>  arch/arm/include/asm/u-boot.h           |    4 +
>  arch/arm/include/asm/unaligned.h        |    2 +-
>  arch/arm/lib/Makefile                   |    8 +
>  arch/arm/lib/board.c                    |   16 +-
>  arch/arm/lib/bootm.c                    |   20 ++-
>  common/image.c                          |    1 +
>  doc/README.armv8                        |   10 ++
>  examples/standalone/stubs.c             |   15 ++
>  include/image.h                         |    1 +
>  37 files changed, 2080 insertions(+), 12 deletions(-)

This patch is a bit large. It would be better to split into preparation
patches and one to add armv8.

>  create mode 100644 arch/arm/cpu/armv8/Makefile
>  create mode 100644 arch/arm/cpu/armv8/cache.S
>  create mode 100644 arch/arm/cpu/armv8/cache_v8.c
>  create mode 100644 arch/arm/cpu/armv8/config.mk
>  create mode 100644 arch/arm/cpu/armv8/cpu.c
>  create mode 100644 arch/arm/cpu/armv8/crt0.S
>  create mode 100644 arch/arm/cpu/armv8/exceptions.S
>  create mode 100644 arch/arm/cpu/armv8/interrupts.c
>  create mode 100644 arch/arm/cpu/armv8/relocate.S
>  create mode 100644 arch/arm/cpu/armv8/start.S
>  create mode 100644 arch/arm/cpu/armv8/timer.c
>  create mode 100644 arch/arm/cpu/armv8/tlb.S
>  create mode 100644 arch/arm/cpu/armv8/u-boot.lds
>  create mode 100644 arch/arm/include/asm/arch-armv8/gpio.h
>  create mode 100644 arch/arm/include/asm/arch-armv8/mmu.h
>  create mode 100644 doc/README.armv8
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 6e50fc4..d142307 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1095,6 +1095,10 @@ Sergey Yanovich <ynvich@gmail.com>
>  
>  	lp8x4x		xscale/pxa
>  
> +David Feng <fenghua@phytium.com.cn>
> +
> +	vexpress_aemv8a		ARM ARMV8 (Quad Core)
> +

This belongs in the patch adding the board support.

>  -------------------------------------------------------------------------
>  
>  Unknown / orphaned boards:
> diff --git a/arch/arm/config.mk b/arch/arm/config.mk
> index ce3903b..f1c6a7b 100644
> --- a/arch/arm/config.mk
> +++ b/arch/arm/config.mk
> @@ -74,7 +74,9 @@ endif
>  endif
>  
>  # needed for relocation
> +ifndef CONFIG_ARMV8
>  LDFLAGS_u-boot += -pie
> +endif
>  
>  #
>  # FIXME: binutils versions < 2.22 have a bug in the assembler where
> @@ -95,6 +97,8 @@ endif
>  endif
>  
>  # check that only R_ARM_RELATIVE relocations are generated
> +ifndef CONFIG_ARMV8
>  ifneq ($(CONFIG_SPL_BUILD),y)
>  ALL-y	+= checkarmreloc
>  endif
> +endif
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> new file mode 100644
> index 0000000..55fd365
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -0,0 +1,56 @@
> +#
> +# Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
> +#
> +# See file CREDITS for list of people who contributed to this
> +# project.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundatio; either version 2 of
> +# the License, or (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> +# MA 02111-1307 USA

Shouldn't this and several other places use the new license tag.

> +#
> +
> +include $(TOPDIR)/config.mk
> +
> +LIB	= $(obj)lib$(CPU).o
> +
> +START	:= start.o
> +
> +COBJS	+= cpu.o
> +COBJS	+= timer.o
> +COBJS	+= cache_v8.o
> +COBJS	+= interrupts.o
> +
> +SOBJS	+= crt0.o
> +SOBJS	+= relocate.o
> +SOBJS	+= exceptions.o
> +SOBJS	+= cache.o
> +SOBJS	+= tlb.o
> +
> +SRCS	:= $(START:.o=.S) $(COBJS:.o=.c)
> +OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
> +START	:= $(addprefix $(obj),$(START))
> +
> +all:	$(obj).depend $(START) $(LIB)
> +
> +$(LIB):	$(OBJS)
> +	$(call cmd_link_o_target, $(OBJS))
> +
> +#########################################################################
> +
> +# defines $(obj).depend target
> +include $(SRCTREE)/rules.mk
> +
> +sinclude $(obj).depend
> +
> +#########################################################################
> diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
> new file mode 100644
> index 0000000..050c1c0
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cache.S
> @@ -0,0 +1,145 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * void __asm_flush_dcache_level(level)
> + *
> + * clean and invalidate one level cache.
> + *
> + * x0: cache level
> + * x1~x9: clobbered
> + */
> +ENTRY(__asm_flush_dcache_level)
> +	lsl	x1, x0, #1
> +	msr	csselr_el1, x1		/* select cache level */
> +	isb				/* isb to sych the new cssr & csidr */
> +	mrs	x6, ccsidr_el1		/* read the new ccsidr */
> +	and	x2, x6, #7		/* x2 <- length of the cache lines */
> +	add	x2, x2, #4		/* add 4 (line length offset) */
> +	mov	x3, #0x3ff
> +	and	x3, x3, x6, lsr #3	/* x3 <- maximum number of way size */
> +	clz	w5, w3			/* bit position of way size */
> +	mov	x4, #0x7fff
> +	and	x4, x4, x1, lsr #13	/* x4 <- max number of the set size */
> +	/* x1 <- cache level << 1 */
> +	/* x2 <- line length offset */
> +	/* x3 <- number of cache ways */
> +	/* x4 <- number of cache sets */
> +	/* x5 <- bit position of way size */
> +
> +loop_set:
> +	mov	x6, x3			/* create working copy of way size */
> +loop_way:
> +	lsl	x7, x6, x5
> +	orr	x9, x0, x7		/* map way and level to cisw value */
> +	lsl	x7, x4, x2
> +	orr	x9, x9, x7		/* map set number to cisw value */
> +	dc	cisw, x9		/* clean & invalidate by set/way */
> +	subs	x6, x6, #1		/* decrement the way */
> +	b.ge	loop_way
> +	subs	x4, x4, #1		/* decrement the set */
> +	b.ge	loop_set
> +
> +	ret
> +ENDPROC(__asm_flush_dcache_level)
> +
> +/*
> + * void __asm_flush_dcache_all(void)
> + *
> + * clean and invalidate all data cache by SET/WAY.
> + */
> +ENTRY(__asm_flush_dcache_all)
> +	dsb	sy
> +	mov	x15, lr
> +	mrs	x10, clidr_el1		/* read clidr */
> +	lsr	x11, x10, #24
> +	and	x11, x11, #0x7		/* x11 <- loc */
> +	cbz	x11, finished		/* if loc is 0, no need to clean */
> +	mov	x0, #0			/* start flush at cache level 0 */
> +	/* x0  <- cache level */
> +	/* x10 <- clidr_el1 */
> +	/* x11 <- loc */
> +
> +loop_level:
> +	lsl	x1, x0, #1
> +	add	x1, x1, x0		/* x0 <- 3x cache level */
> +	lsr	x1, x10, x1
> +	and	x1, x1, #7		/* x1 <- cache type */
> +	cmp	x1, #2
> +	b.lt	skip			/* skip if no cache or icache */
> +	bl	__asm_flush_dcache_level
> +skip:
> +	add	x0, x0, #1		/* increment cache level */
> +	cmp	x11, x0
> +	b.gt	loop_level
> +
> +finished:
> +	mov	x0, #0
> +	msr	csselr_el1, x0		/* swith back to cache level 0 */
> +	dsb	sy
> +	isb
> +	mov	lr, x15
> +	ret
> +ENDPROC(__asm_flush_dcache_all)
> +
> +/*
> + * void __asm_flush_dcache_range(start, end)
> + *
> + * clean & invalidate data cache in the range
> + *
> + * x0: start address
> + * x1: end address
> + */
> +ENTRY(__asm_flush_dcache_range)
> +	mrs	x3, ctr_el0		/* read CTR */
> +	lsr	x3, x3, #16
> +	and	x3, x3, #0xf		/* cache line size encoding */
> +	mov	x2, #4			/* bytes per word */
> +	lsl	x2, x2, x3		/* actual cache line size */
> +
> +	/* x2 <- minimal cache line size in cache system */
> +	sub	x3, x2, #1
> +	bic	x0, x0, x3
> +1:      dc	civac, x0		/* clean & invalidate D/unified line */
> +	add	x0, x0, x2
> +	cmp	x0, x1
> +	b.lo	1b
> +	dsb	sy
> +	ret
> +ENDPROC(__asm_flush_dcache_range)
> +
> +/*
> + * void __asm_invalidate_icache_all(void)
> + *
> + * invalidate all tlb entries.
> + */
> +ENTRY(__asm_invalidate_icache_all)
> +	ic	ialluis
> +	isb	sy
> +	ret
> +ENDPROC(__asm_invalidate_icache_all)
> diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
> new file mode 100644
> index 0000000..56a1489
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cache_v8.c
> @@ -0,0 +1,275 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <common.h>
> +#include <asm/system.h>
> +#include <asm/arch/mmu.h>
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +#ifndef CONFIG_SYS_DCACHE_OFF
> +
> +static void set_pgtable_section(u64 section, u64 memory_type)
> +{
> +	u64 *page_table = (u64 *)gd->arch.tlb_addr;
> +	u64 value;
> +
> +	value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
> +	value |= PMD_ATTRINDX(memory_type);
> +	page_table[section] = value;
> +}
> +
> +/* to activate the MMU we need to set up virtual memory */
> +static inline void mmu_setup(void)
> +{
> +	int i, j, el;
> +	bd_t *bd = gd->bd;
> +
> +	/* Setup an identity-mapping for all spaces */
> +	for (i = 0; i < (PAGE_SIZE >> 3); i++)
> +		set_pgtable_section(i, MT_DEVICE_nGnRnE);
> +
> +	/* Setup an identity-mapping for all RAM space */
> +	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
> +		ulong start = bd->bi_dram[i].start;
> +		ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size;
> +		for (j = start >> SECTION_SHIFT;
> +		     j < end >> SECTION_SHIFT; j++) {
> +			set_pgtable_section(j, MT_NORMAL);
> +		}
> +	}
> +
> +	/* load TTBR0 */
> +	el = curent_el();
> +	if (el == 1)
> +		asm volatile("msr ttbr0_el1, %0"
> +			     : : "r" (gd->arch.tlb_addr) : "memory");
> +	else if (el == 2)
> +		asm volatile("msr ttbr0_el2, %0"
> +			     : : "r" (gd->arch.tlb_addr) : "memory");
> +	else
> +		panic("Not Supported Exception Level");
> +
> +	/* enable the mmu */
> +	set_sctlr(get_sctlr() | CR_M);
> +}
> +
> +/*
> + * Performs a invalidation of the entire data cache
> + * at all levels
> + */
> +void invalidate_dcache_all(void)
> +{
> +	__asm_flush_dcache_all();
> +	v8_outer_cache_inval_all();
> +}
> +
> +/*
> + * Performs a clean & invalidation of the entire data cache
> + * at all levels
> + */
> +void flush_dcache_all(void)
> +{
> +	__asm_flush_dcache_all();
> +	v8_outer_cache_flush_all();
> +}
> +
> +/*
> + * Invalidates range in all levels of D-cache/unified cache used:
> + * Affects the range [start, stop - 1]
> + */
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	__asm_flush_dcache_range(start, stop);
> +	v8_outer_cache_inval_range(start, stop);
> +}
> +
> +/*
> + * Flush range(clean & invalidate) from all levels of D-cache/unified
> + * cache used:
> + * Affects the range [start, stop - 1]
> + */
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	__asm_flush_dcache_range(start, stop);
> +	v8_outer_cache_flush_range(start, stop);
> +}
> +
> +void dcache_enable(void)
> +{
> +	uint32_t sctlr;
> +
> +	sctlr = get_sctlr();
> +
> +	/* The data cache is not active unless the mmu is enabled too */
> +	if (!(sctlr & CR_M)) {
> +		v8_outer_cache_enable();
> +		invalidate_dcache_all();
> +		__asm_invalidate_tlb_all();
> +		mmu_setup();
> +	}
> +
> +	set_sctlr(sctlr | CR_C);
> +}
> +
> +void dcache_disable(void)
> +{
> +	uint32_t sctlr;
> +
> +	sctlr = get_sctlr();
> +
> +	/* if cache isn't enabled no need to disable */
> +	if (!(sctlr & CR_C))
> +		return;
> +
> +	set_sctlr(sctlr & ~(CR_C|CR_M));
> +
> +	flush_dcache_all();
> +	__asm_invalidate_tlb_all();
> +}
> +
> +int dcache_status(void)
> +{
> +	return (get_sctlr() & CR_C) != 0;
> +}
> +
> +#else	/* CONFIG_SYS_DCACHE_OFF */
> +
> +void invalidate_dcache_all(void)
> +{
> +}
> +
> +void flush_dcache_all(void)
> +{
> +}
> +
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +}
> +
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +}
> +
> +void dcache_enable(void)
> +{
> +}
> +
> +void dcache_disable(void)
> +{
> +}
> +
> +int dcache_status(void)
> +{
> +	return 0;
> +}
> +
> +#endif	/* CONFIG_SYS_DCACHE_OFF */
> +
> +#ifndef CONFIG_SYS_ICACHE_OFF
> +
> +void icache_enable(void)
> +{
> +	set_sctlr(get_sctlr() | CR_I);
> +}
> +
> +void icache_disable(void)
> +{
> +	set_sctlr(get_sctlr() & ~CR_I);
> +}
> +
> +int icache_status(void)
> +{
> +	return (get_sctlr() & CR_I) != 0;
> +}
> +
> +void invalidate_icache_all(void)
> +{
> +	__asm_invalidate_icache_all();
> +}
> +
> +#else	/* CONFIG_SYS_ICACHE_OFF */
> +
> +void icache_enable(void)
> +{
> +}
> +
> +void icache_disable(void)
> +{
> +}
> +
> +int icache_status(void)
> +{
> +	return 0;
> +}
> +
> +void invalidate_icache_all(void)
> +{
> +}
> +
> +#endif	/* CONFIG_SYS_ICACHE_OFF */
> +
> +/*
> + * Enable dCache & iCache, whether cache is actually enabled
> + * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
> + */
> +void enable_caches(void)
> +{
> +	icache_enable();
> +	dcache_enable();
> +}
> +
> +/*
> + * Flush range from all levels of d-cache/unified-cache used:
> + * Affects the range [start, start + size - 1]
> + */
> +void flush_cache(unsigned long start, unsigned long size)
> +{
> +	flush_dcache_range(start, start + size);
> +}
> +
> +/*
> + * Stub implementations for outer cache operations
> + */
> +void __v8_outer_cache_enable(void) {}
> +void v8_outer_cache_enable(void)
> +	__attribute__((weak, alias("__v8_outer_cache_enable")));

These can just be:

void __weak v8_outer_cache_enable(void) {}

> +
> +void __v8_outer_cache_disable(void) {}
> +void v8_outer_cache_disable(void)
> +	__attribute__((weak, alias("__v8_outer_cache_disable")));
> +
> +void __v8_outer_cache_flush_all(void) {}
> +void v8_outer_cache_flush_all(void)
> +	__attribute__((weak, alias("__v8_outer_cache_flush_all")));
> +
> +void __v8_outer_cache_inval_all(void) {}
> +void v8_outer_cache_inval_all(void)
> +	__attribute__((weak, alias("__v8_outer_cache_inval_all")));
> +
> +void __v8_outer_cache_flush_range(u64 start, u64 end) {}
> +void v8_outer_cache_flush_range(u64 start, u64 end)
> +	__attribute__((weak, alias("__v8_outer_cache_flush_range")));
> +
> +void __v8_outer_cache_inval_range(u64 start, u64 end) {}
> +void v8_outer_cache_inval_range(u64 start, u64 end)
> +	__attribute__((weak, alias("__v8_outer_cache_inval_range")));
> diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
> new file mode 100644
> index 0000000..aae2170
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/config.mk
> @@ -0,0 +1,31 @@
> +#
> +# Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
> +#
> +# See file CREDITS for list of people who contributed to this
> +# project.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation; either version 2 of
> +# the License, or (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> +# MA 02111-1307 USA
> +#
> +PLATFORM_RELFLAGS += -fno-common -ffixed-x18
> +
> +# SEE README.arm-unaligned-accesses
> +PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
> +PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
> +
> +PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
> +PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
> +PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
> +PLATFORM_CPPFLAGS += -fpic
> diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
> new file mode 100644
> index 0000000..76e76b6
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cpu.c
> @@ -0,0 +1,68 @@
> +/*
> + * (C) Copyright 2008 Texas Insturments
> + *
> + * (C) Copyright 2002
> + * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
> + * Marius Groeger <mgroeger@sysgo.de>
> + *
> + * (C) Copyright 2002
> + * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +/*
> + * CPU specific code
> + */
> +
> +#include <common.h>
> +#include <command.h>
> +#include <asm/system.h>
> +#include <linux/compiler.h>
> +
> +void __weak cpu_cache_initialization(void){}
> +
> +int cleanup_before_linux(void)
> +{
> +	/*
> +	 * this function is called just before we call linux
> +	 * it prepares the processor for linux
> +	 *
> +	 * we turn off caches etc ...
> +	 */
> +#ifndef CONFIG_SPL_BUILD
> +	disable_interrupts();
> +#endif
> +
> +	/*
> +	 * Turn off I-cache and invalidate it
> +	 */
> +	icache_disable();
> +	invalidate_icache_all();
> +
> +	/*
> +	 * turn off D-cache
> +	 * dcache_disable() in turn flushes the d-cache and disables MMU
> +	 */
> +	dcache_disable();
> +	v8_outer_cache_disable();
> +
> +	/*
> +	 * After D-cache is flushed and before it is disabled there may
> +	 * be some new valid entries brought into the cache. We are sure
> +	 * that these lines are not dirty and will not affect our execution.
> +	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
> +	 * is all we did during this. We have not pushed anything on to the
> +	 * stack. Neither have we affected any static data)
> +	 * So just invalidate the entire d-cache again to avoid coherency
> +	 * problems for kernel
> +	 */
> +	invalidate_dcache_all();
> +
> +	/*
> +	 * Some CPU need more cache attention before starting the kernel.
> +	 */
> +	cpu_cache_initialization();
> +
> +	return 0;
> +}
> diff --git a/arch/arm/cpu/armv8/crt0.S b/arch/arm/cpu/armv8/crt0.S
> new file mode 100644
> index 0000000..97d6806
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/crt0.S
> @@ -0,0 +1,130 @@
> +/*
> + * crt0 - C-runtime startup Code for AArch64 U-Boot
> + *
> + * Copyright (c) 2013  David Feng <fenghua@phytium.com.cn>
> + *
> + * Copyright (c) 2012  Albert ARIBAUD <albert.u.boot@aribaud.net>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <config.h>
> +#include <asm-offsets.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * This file handles the target-independent stages of the U-Boot
> + * start-up where a C runtime environment is needed. Its entry point
> + * is _main and is branched into from the target's start.S file.
> + *
> + * _main execution sequence is:
> + *
> + * 1. Set up initial environment for calling board_init_f().
> + *    This environment only provides a stack and a place to store
> + *    the GD ('global data') structure, both located in some readily
> + *    available RAM (SRAM, locked cache...). In this context, VARIABLE
> + *    global data, initialized or not (BSS), are UNAVAILABLE; only
> + *    CONSTANT initialized data are available.
> + *
> + * 2. Call board_init_f(). This function prepares the hardware for
> + *    execution from system RAM (DRAM, DDR...) As system RAM may not
> + *    be available yet, , board_init_f() must use the current GD to
> + *    store any data which must be passed on to later stages. These
> + *    data include the relocation destination, the future stack, and
> + *    the future GD location.
> + *
> + * (the following applies only to non-SPL builds)
> + *
> + * 3. Set up intermediate environment where the stack and GD are the
> + *    ones allocated by board_init_f() in system RAM, but BSS and
> + *    initialized non-const data are still not available.
> + *
> + * 4. Call relocate_code(). This function relocates U-Boot from its
> + *    current location into the relocation destination computed by
> + *    board_init_f().
> + *
> + * 5. Set up final environment for calling board_init_r(). This
> + *    environment has BSS (initialized to 0), initialized non-const
> + *    data (initialized to their intended value), and stack in system
> + *    RAM. GD has retained values set by board_init_f(). Some CPUs
> + *    have some work left to do at this point regarding memory, so
> + *    call c_runtime_cpu_setup.
> + *
> + * 6. Branch to board_init_r().
> + */
> +
> +ENTRY(_main)
> +
> +/*
> + * Set up initial C runtime environment and call board_init_f(0).
> + */
> +	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
> +	sub	x0, x0, #GD_SIZE	/* allocate one GD above SP */
> +	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
> +	mov	x18, sp			/* GD is above SP */
> +	mov	x0, #0
> +	bl	board_init_f
> +
> +/*
> + * Set up intermediate environment (new sp and gd) and call
> + * relocate_code(addr_moni). Trick here is that we'll return
> + * 'here' but relocated.
> + */
> +	ldr	x0, [x18, #GD_START_ADDR_SP]	/* x0 <- gd->start_addr_sp */
> +	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
> +	ldr	x18, [x18, #GD_BD]		/* x18 <- gd->bd */
> +	sub	x18, x18, #GD_SIZE		/* new GD is below bd */
> +
> +	adr	lr, relocation_return
> +	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
> +	add	lr, lr, x9	/* new return address after relocation */
> +	ldr	x0, [x18, #GD_RELOCADDR]	/* x0 <- gd->relocaddr */
> +	b	relocate_code
> +
> +relocation_return:
> +
> +/*
> + * Set up final (full) environment
> + */
> +	bl	c_runtime_cpu_setup		/* still call old routine */
> +
> +/*
> + * Clear BSS section
> + */
> +	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
> +	ldr	x0, =__bss_start
> +	add	x0, x0, x9			/* x0 <- __bss_start in RAM */
> +	ldr	x1, =__bss_end
> +	add	x1, x1, x9			/* x1 <- __bss_end in RAM */
> +	mov	x2, #0
> +clear_loop:
> +	str	x2, [x0]
> +	add	x0, x0, #8
> +	cmp	x0, x1
> +	b.lo	clear_loop
> +
> +	/* call board_init_r(gd_t *id, ulong dest_addr) */
> +	mov	x0, x18				/* gd_t */
> +	ldr	x1, [x18, #GD_RELOCADDR]	/* dest_addr */
> +	b	board_init_r			/* PC relative jump */
> +
> +	/* NOTREACHED - board_init_r() does not return */
> +
> +ENDPROC(_main)
> diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
> new file mode 100644
> index 0000000..2a3962b
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/exceptions.S
> @@ -0,0 +1,173 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <asm/ptrace.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * Enter Exception.
> + * This will save the processor state that is X0~X29/LR/SP/ELR/PSTATE
> + * to the stack frame.
> + */
> +#define	EXCEPTION_ENTRY				\
> +	sub	sp, sp, S_FRAME_SIZE - S_LR	;\
> +	push	x28, x29			;\
> +	push	x26, x27			;\
> +	push	x24, x25			;\
> +	push	x22, x23			;\
> +	push	x20, x21			;\
> +	push	x18, x19			;\
> +	push	x16, x17			;\
> +	push	x14, x15			;\
> +	push	x12, x13			;\
> +	push	x10, x11			;\
> +	push	x8, x9				;\
> +	push	x6, x7				;\
> +	push	x4, x5				;\
> +	push	x2, x3				;\
> +	push	x0, x1				;\
> +	add	x21, sp, S_FRAME_SIZE		;\
> +						;\
> +	/* Could be running at EL1 or EL2 */	;\
> +	mrs	x0, CurrentEL			;\
> +	cmp	x0, 0x4				;\
> +	b.eq	1f				;\
> +	cmp	x0, 0x8				;\
> +	b.eq	2f				;\
> +	b	3f				;\
> +1:	mrs	x22, elr_el1			;\
> +	mrs	x23, spsr_el1			;\
> +	mrs	x1, esr_el1			;\
> +	b	3f				;\
> +2:	mrs	x22, elr_el2			;\
> +	mrs	x23, spsr_el2			;\
> +	mrs	x1, esr_el2			;\
> +3:						;\
> +	stp	lr, x21, [sp, S_LR]		;\
> +	stp	x22, x23, [sp, S_PC]		;\
> +	mov	x0, sp
> +
> +/*
> + * Exit Exception.
> + * This will restore the processor state that is X0~X29/LR/SP/ELR/PSTATE
> + * from the stack frame and return from exceprion.
> + */
> +#define	EXCEPTION_EXIT				\
> +	ldp	x21, x22, [sp, S_PC]		;\
> +						;\
> +	/* Could be running at EL1 or EL2 */	;\
> +	mrs	x0, CurrentEL			;\
> +	cmp	x0, 0x4				;\
> +	b.eq	1f				;\
> +	cmp	x0, 0x8				;\
> +	b.eq	2f				;\
> +	b	3f				;\
> +1:	msr	elr_el1, x21			;\
> +	msr	spsr_el1, x22			;\
> +	b	3f				;\
> +2:	msr	elr_el2, x21			;\
> +	msr	spsr_el2, x22			;\
> +3:						;\
> +	pop	x0, x1				;\
> +	pop	x2, x3				;\
> +	pop	x4, x5				;\
> +	pop	x6, x7				;\
> +	pop	x8, x9				;\
> +	pop	x10, x11			;\
> +	pop	x12, x13			;\
> +	pop	x14, x15			;\
> +	pop	x16, x17			;\
> +	pop	x18, x19			;\
> +	pop	x20, x21			;\
> +	pop	x22, x23			;\
> +	pop	x24, x25			;\
> +	pop	x26, x27			;\
> +	pop	x28, x29			;\
> +	ldr	lr, [sp], S_FRAME_SIZE - S_LR	;\
> +	eret
> +
> +/*
> + * Exception vectors.
> + */
> +	.align	11
> +	.globl	vectors
> +vectors:
> +	.align	7
> +	b	_do_bad_sync	/* Current EL Synchronous Thread */
> +
> +	.align	7
> +	b	_do_bad_irq	/* Current EL IRQ Thread */
> +
> +	.align	7
> +	b	_do_bad_fiq	/* Current EL FIQ Thread */
> +
> +	.align	7
> +	b	_do_bad_error	/* Current EL Error Thread */
> +
> +	.align	7
> +	b	_do_sync	/* Current EL Synchronous Handler */
> +
> +	.align	7
> +	b	_do_irq		/* Current EL IRQ Handler */
> +
> +	.align	7
> +	b	_do_fiq		/* Current EL FIQ Handler */
> +
> +	.align	7
> +	b	_do_error	/* Current EL Error Handler */
> +
> +
> +_do_bad_sync:
> +	EXCEPTION_ENTRY
> +	bl	do_bad_sync
> +
> +_do_bad_irq:
> +	EXCEPTION_ENTRY
> +	bl	do_bad_irq
> +
> +_do_bad_fiq:
> +	EXCEPTION_ENTRY
> +	bl	do_bad_fiq
> +
> +_do_bad_error:
> +	EXCEPTION_ENTRY
> +	bl	do_bad_error
> +
> +_do_sync:
> +	EXCEPTION_ENTRY
> +	bl	do_sync
> +
> +_do_irq:
> +	EXCEPTION_ENTRY
> +	bl	do_irq
> +
> +_do_fiq:
> +	EXCEPTION_ENTRY
> +	bl	do_fiq
> +
> +_do_error:
> +	EXCEPTION_ENTRY
> +	bl	do_error
> diff --git a/arch/arm/cpu/armv8/interrupts.c b/arch/arm/cpu/armv8/interrupts.c
> new file mode 100644
> index 0000000..7a4e9d9
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/interrupts.c
> @@ -0,0 +1,158 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <common.h>
> +
> +
> +#ifdef CONFIG_USE_IRQ

No ARM board actually supports this option and you just define the same
functions, so the ifdef is pointless.

> +int interrupt_init(void)
> +{
> +	return 0;
> +}
> +
> +/* enable IRQ interrupts */
> +void enable_interrupts(void)
> +{
> +}
> +
> +/*
> + * disable IRQ/FIQ interrupts
> + * returns true if interrupts had been enabled before we disabled them
> + */
> +int disable_interrupts(void)
> +{
> +	return 0;
> +}
> +#else
> +int interrupt_init(void)
> +{
> +	return 0;
> +}
> +
> +void enable_interrupts(void)
> +{
> +	return;
> +}
> +int disable_interrupts(void)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_USE_IRQ */
> +
> +void show_regs(struct pt_regs *regs)
> +{
> +	int i;
> +
> +	printf("PC:     %lx\n", regs->pc);
> +	printf("LR:     %lx\n", regs->regs[30]);
> +	printf("PSTATE: %08lx\n", regs->pstate);
> +	printf("SP :    %lx\n", regs->sp);
> +	for (i = 0; i < 30; i += 2)
> +		printf("x%-2d: %016lx x%-2d: %016lx\n",
> +		       i, regs->regs[i], i+1, regs->regs[i+1]);
> +	printf("\n");
> +}
> +
> +/*
> + * do_bad_sync handles the impossible case in the Synchronous Abort vector.
> + */
> +void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Synchronous Abort\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_irq handles the impossible case in the Irq vector.
> + */
> +void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Irq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_fiq handles the impossible case in the Fiq vector.
> + */
> +void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Fiq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_error handles the impossible case in the Error vector.
> + */
> +void do_bad_error(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Error\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_sync handles the Synchronous Abort exception.
> + */
> +void do_sync(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Synchronous Abort\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_irq handles the Irq exception.
> + */
> +void do_irq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Irq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_fiq handles the Fiq exception.
> + */
> +void do_fiq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Fiq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_error handles the Error exception.
> + * Errors are more likely to be processor specific,
> + * it is defined with weak attribute and can be redefined
> + * in processor specific code.
> + */
> +void __do_error(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Error\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +void do_error(struct pt_regs *pt_regs, unsigned int esr)
> +	__attribute__((weak, alias("__do_error")));

Just declare the function __weak.

> diff --git a/arch/arm/cpu/armv8/relocate.S b/arch/arm/cpu/armv8/relocate.S
> new file mode 100644
> index 0000000..6553d6d
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/relocate.S
> @@ -0,0 +1,73 @@
> +/*
> + * relocate - common relocation function for AArch64 U-Boot
> + *
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * Copyright (c) 2013  Albert ARIBAUD <albert.u.boot@aribaud.net>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * void relocate_code (addr_moni)
> + *
> + * This function relocates the monitor code.
> + *
> + * NOTE:
> + * GOT is used and configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
> + */
> +ENTRY(relocate_code)
> +	/*
> +	 * Copy u-boot from flash to RAM
> +	 */
> +	ldr	x1, =__image_copy_start	/* x1 <- copy source */
> +	cmp	x1, x0
> +	b.eq	relocate_done		/* skip relocation */
> +	mov	x2, x0			/* x2 <- copy destination */
> +	ldr	x3, =__image_copy_end	/* x3 <- source end address */
> +
> +copy_loop:
> +	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
> +	stp	x10, x11, [x2], #16	/* copy to   target address [x2] */
> +	cmp	x1, x3			/* until source end address [x3] */
> +	b.lo	copy_loop
> +
> +	/*
> +	 * Fix .reloc relocations
> +	 */
> +	ldr	x9, [x18, #GD_RELOC_OFF]/* x9 <- relocation offset */
> +	ldr	x1, =__rel_got_start	/* x1 <- rel got start ofs */
> +	add	x1, x1, x9		/* x1 <- rel got start in RAM */
> +	ldr	x2, =__rel_got_end	/* x2 <- rel got end ofs */
> +	add	x2, x2, x9		/* x2 <- rel got end in RAM */
> +fixloop:
> +	ldr	x10, [x1]
> +	add	x10, x10, x9		/* x10 <- address to be fixed up */
> +	str	x10, [x1]
> +	add	x1, x1, #8		/* each gotn entry is 8 bytes */
> +	cmp	x1, x2
> +	b.lo	fixloop
> +
> +relocate_done:
> +	ret
> +ENDPROC(relocate_code)
> diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
> new file mode 100644
> index 0000000..a59b711
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/start.S
> @@ -0,0 +1,253 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +#include <asm/arch/mmu.h>
> +
> +/*************************************************************************
> + *
> + * Startup Code (reset vector)
> + *
> + *************************************************************************/
> +
> +.globl _start
> +_start:
> +	b	reset
> +
> +	.align 3
> +
> +.globl _TEXT_BASE
> +_TEXT_BASE:
> +	.quad	CONFIG_SYS_TEXT_BASE
> +
> +/*
> + * These are defined in the linker script.
> + */
> +.globl	_end_ofs
> +_end_ofs:
> +	.quad	_end - _start
> +
> +.globl	_bss_start_ofs
> +_bss_start_ofs:
> +	.quad	__bss_start - _start
> +
> +.globl	_bss_end_ofs
> +_bss_end_ofs:
> +	.quad	__bss_end - _start
> +
> +reset:
> +	/*
> +	 * EL3 initialisation
> +	 */
> +	mrs	x0, CurrentEL
> +	cmp	x0, #0xc			/* EL3? */
> +	b.ne	reset_nonsecure			/* skip EL3 initialisation */
> +
> +	mov	x0, #0x30			/* RES1 */
> +	orr	x0, x0, #(1 << 0)		/* Non-secure EL1 */
> +	orr	x0, x0, #(1 << 8)		/* HVC enable */
> +	orr	x0, x0, #(1 << 10)		/* 64-bit EL2 */
> +	msr	scr_el3, x0
> +
> +	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3 */
> +
> +	/* Counter frequency initialisation */
> +	ldr	x0, =CONFIG_SYS_CNTFRQ
> +	msr	cntfrq_el0, x0
> +
> +	/* GIC initialisation */
> +	mrs	x0, mpidr_el1
> +	tst	x0, #15
> +	b.ne	1f				/* secondary CPU */
> +
> +	ldr	x1, =GIC_DIST_BASE		/* GICD_CTLR */
> +	mov	w0, #3				/* EnableGrp0 | EnableGrp1 */
> +	str	w0, [x1]
> +
> +1:	ldr	x1, =GIC_DIST_BASE + 0x80	/* GICD_IGROUPR */
> +	mov	w0, #~0				/* Grp1 interrupts */
> +	str	w0, [x1], #4
> +	b.ne	2f		/* Only local interrupts for secondary CPUs */
> +	str	w0, [x1], #4
> +	str	w0, [x1], #4
> +
> +2:	ldr	x1, =GIC_CPU_BASE		/* GICC_CTLR */
> +	ldr	w0, [x1]
> +	mov	w0, #3				/* EnableGrp0 | EnableGrp1 */
> +	str	w0, [x1]
> +
> +	mov	w0, #1 << 7		/* allow NS access to GICC_PMR */
> +	str	w0, [x1, #4]			/* GICC_PMR */
> +
> +	/* SCTLR_EL2 initialisation */
> +	msr	sctlr_el2, xzr
> +
> +#ifdef CONFIG_BOOTING_EL1
> +	/*
> +	 * EL2 initialization
> +	 */
> +	/* Set EL1 to be 64bit */
> +	mov	x0, #(1 << 31)
> +	msr	hcr_el2, x0
> +
> +	/* Initialize Generic Timers */
> +	mrs	x0, cnthctl_el2
> +	orr	x0, x0, #3		/* Enable EL1 access to timers */
> +	msr	cnthctl_el2, x0
> +	msr	cntvoff_el2, x0			/* Clear virtual offset */
> +	mrs	x0, cntkctl_el1
> +	orr	x0, x0, #3			/* EL0 access to counters */
> +	msr	cntkctl_el1, x0
> +
> +	/* Initilize ID registers */
> +	mrs	x0, midr_el1
> +	mrs	x1, mpidr_el1
> +	msr	vpidr_el2, x0
> +	msr	vmpidr_el2, x1
> +
> +	/* Coprocessor traps */
> +	mov	x0, #0x33ff
> +	msr	cptr_el2, x0		/* Disable coprocessor traps to EL2 */
> +	msr	hstr_el2, xzr			/* Disable CP15 traps to EL2 */
> +
> +	/* SCTLR_EL1 initialization */
> +	mov	x0, #0x0800
> +	movk	x0, #0x30d0, lsl #16
> +	msr	sctlr_el1, x0
> +#endif
> +
> +	/* Return to the EL2_SP1 mode from EL3 */
> +	adr	x0, reset_nonsecure
> +#ifdef CONFIG_BOOTING_EL1
> +	mov	x1, #0x3c5			/* EL1_SP1 | D | A | I | F */
> +#else
> +	mov	x1, #0x3c9			/* EL2_SP2 | D | A | I | F */
> +#endif
> +	msr	elr_el3, x0
> +	msr	spsr_el3, x1
> +	eret
> +
> +	/*
> +	 * MMU Disabled, iCache Disabled, dCache Disabled
> +	 */
> +reset_nonsecure:
> +
> +	/* Initialize vBAR/CPACR_EL1/MDSCR_EL1 */
> +	adr	x0, vectors
> +	switch_el1_el2 x1, 1f, 2f, 3f
> +1:	msr	vbar_el1, x0
> +	mov	x0, #3 << 20
> +	msr	cpacr_el1, x0			/* Enable FP/SIMD */
> +	msr	mdscr_el1, xzr
> +	b	3f
> +2:	msr	vbar_el2, x0
> +3:
> +
> +	/* Cache/BPB/TLB Invalidate */
> +	bl	__asm_flush_dcache_all		/* dCache invalidate */
> +	bl	__asm_invalidate_icache_all	/* iCache invalidate */
> +	bl	__asm_invalidate_tlb_all	/* invalidate I + D TLBs */
> +
> +	/* Processor specific initialisation */
> +#ifndef CONFIG_SKIP_LOWLEVEL_INIT
> +	bl	lowlevel_init
> +#endif
> +
> +	mrs	x0, mpidr_el1
> +	tst	x0, #15
> +	b.eq	master_cpu
> +
> +	/*
> +	 * Secondary CPUs
> +	 */
> +slave_cpu:
> +
> +	wfe
> +	ldr	x1, =SECONDARY_CPU_MAILBOX
> +	ldr	x0, [x1]
> +	cbz	x0, slave_cpu
> +	br	x0			/* branch to the given address */
> +
> +	/*
> +	 * Primary CPU
> +	 */
> +master_cpu:
> +
> +	bl	_main
> +
> +/*-------------------------------------------------------------------------*/
> +
> +ENTRY(c_runtime_cpu_setup)
> +	/* If I-cache is enabled invalidate it */
> +#ifndef CONFIG_SYS_ICACHE_OFF
> +	ic	iallu			/* I+BTB cache invalidate */
> +	isb	sy
> +#endif
> +
> +#ifndef CONFIG_SYS_DCACHE_OFF
> +	/*
> +	 * Memory region attributes:
> +	 *
> +	 *   n = AttrIndx[2:0]
> +	 *                      n       MAIR
> +	 *   DEVICE_nGnRnE      000     00000000
> +	 *   DEVICE_nGnRE       001     00000100
> +	 *   DEVICE_GRE         010     00001100
> +	 *   NORMAL_NC          011     01000100
> +	 *   NORMAL             100     11111111
> +	 */
> +	ldr	x0, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
> +		     MAIR(0x04, MT_DEVICE_nGnRE) | \
> +		     MAIR(0x0c, MT_DEVICE_GRE) | \
> +		     MAIR(0x44, MT_NORMAL_NC) | \
> +		     MAIR(0xff, MT_NORMAL)
> +
> +	/*
> +	 * Set/prepare TCR and TTBR. Using 512GB address range.
> +	 */
> +	ldr     x1, =TCR_T0SZ(VA_BITS) | TCR_FLAGS | TCR_TG0_64K
> +
> +	switch_el1_el2 x2, 1f, 2f, 3f
> +1:	orr     x1, x1, TCR_EL1_IPS_40BIT
> +	msr     mair_el1, x0
> +	msr     tcr_el1, x1
> +	b	3f
> +2:	orr     x1, x1, TCR_EL2_IPS_40BIT
> +	msr     mair_el2, x0
> +	msr     tcr_el2, x1
> +3:
> +#endif
> +
> +	/* Relocate vBAR */
> +	adr	x0, vectors
> +	switch_el1_el2 x1, 1f, 2f, 3f
> +1:	msr	vbar_el1, x0
> +	b	3f
> +2:	msr	vbar_el2, x0
> +3:
> +
> +	ret
> +ENDPROC(c_runtime_cpu_setup)
> diff --git a/arch/arm/cpu/armv8/timer.c b/arch/arm/cpu/armv8/timer.c
> new file mode 100644
> index 0000000..2729e11
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/timer.c
> @@ -0,0 +1,97 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <common.h>
> +#include <div64.h>
> +
> +/*
> + * Genertic Timer implementation of __udelay/get_timer/get_ticks/get_tbclk
> + * functions. If any other timers used, another implementation should be
> + * placed in platform code.
> + */
> +
> +static inline u64 tick_to_time(u64 tick)
> +{
> +	tick *= CONFIG_SYS_HZ;
> +	do_div(tick, CONFIG_SYS_CNTFRQ);

You can read the counter frequency at runtime. Then platforms which
don't start u-boot in EL3 don't need to set this define.

You might want to look at my common timer series. That will greatly
simplify this code.

> +	return tick;
> +}
> +
> +static inline u64 time_to_tick(u64 time)
> +{
> +	time *= CONFIG_SYS_CNTFRQ;
> +	do_div(time, CONFIG_SYS_HZ);
> +	return time;
> +}
> +
> +/*
> + * Generic timer implementation of get_tbclk()
> + */
> +ulong __get_tbclk(void)
> +{
> +	return CONFIG_SYS_HZ;

This should really return the actual counter frequency.

> +}
> +ulong get_tbclk(void)
> +	__attribute__((weak, alias("__get_tbclk")));

This is not needed.


> +
> +/*
> + * Generic timer implementation of get_timer()
> + */
> +ulong __get_timer(ulong base)
> +{
> +	u64 cval;
> +
> +	isb();
> +	asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
> +
> +	tick_to_time(cval);
> +
> +	return tick_to_time(cval) - base;
> +}
> +ulong get_timer(ulong base)
> +	__attribute__((weak, alias("__get_timer")));

This is not needed.


> +
> +/*
> + * Generic timer implementation of get_ticks()
> + */
> +unsigned long long __get_ticks(void)
> +{
> +	return get_timer(0);
> +}
> +unsigned long long get_ticks(void)
> +	__attribute__((weak, alias("__get_ticks")));

This is not needed.


> +
> +/*
> + * Generic timer implementation of __udelay()
> + */
> +void ___udelay(ulong usec)
> +{
> +	unsigned long ticks, limit;
> +
> +	limit = get_ticks() + usec/1000;

This implementation is not ideal as it gives a udelay resolution of 1 msec.

> +
> +	do {
> +		ticks = get_ticks();
> +	} while (ticks < limit);
> +}
> +void __udelay(ulong usec)
> +	__attribute__((weak, alias("___udelay")));

This is not needed.

> diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
> new file mode 100644
> index 0000000..b6cc376
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/tlb.S
> @@ -0,0 +1,45 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +
> +/*
> + * void __asm_invalidate_tlb_all(void)
> + *
> + * invalidate all tlb entries.
> + */
> +ENTRY(__asm_invalidate_tlb_all)
> +	switch_el1_el2 x9, 1f, 2f, 3f
> +1:	tlbi	vmalle1
> +	dsb	sy
> +	isb
> +	b	3f
> +2:	tlbi	alle2
> +	dsb	sy
> +	isb
> +3:
> +	ret
> +ENDPROC(__asm_invalidate_tlb_all)
> diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
> new file mode 100644
> index 0000000..14842e3
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/u-boot.lds
> @@ -0,0 +1,83 @@
> +/*
> + * Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
> +OUTPUT_ARCH(aarch64)
> +ENTRY(_start)
> +SECTIONS
> +{
> +	. = 0x00000000;
> +
> +	. = ALIGN(8);
> +	.text :
> +	{
> +		*(.__image_copy_start)
> +		CPUDIR/start.o (.text*)
> +		*(.text*)
> +	}
> +
> +	. = ALIGN(8);
> +	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
> +
> +	. = ALIGN(8);
> +	.data : {
> +		*(.data*)
> +	}
> +
> +	. = ALIGN(8);
> +
> +	. = .;
> +
> +	. = ALIGN(8);
> +	.u_boot_list : {
> +		KEEP(*(SORT(.u_boot_list*)));
> +	}
> +
> +	. = ALIGN(8);
> +	.reloc : {
> +		__rel_got_start = .;
> +		*(.got)
> +		__rel_got_end = .;
> +	}
> +
> +	.image_copy_end :
> +	{
> +		*(.__image_copy_end)
> +	}
> +
> +	_end = .;
> +
> +	. = ALIGN(8);
> +	.bss : {
> +		__bss_start = .;
> +		*(.bss*)
> +		 . = ALIGN(8);
> +		__bss_end = .;
> +	}
> +
> +	/DISCARD/ : { *(.dynsym) }
> +	/DISCARD/ : { *(.dynstr*) }
> +	/DISCARD/ : { *(.dynamic*) }
> +	/DISCARD/ : { *(.plt*) }
> +	/DISCARD/ : { *(.interp*) }
> +	/DISCARD/ : { *(.gnu*) }
> +}
> diff --git a/arch/arm/include/asm/arch-armv8/gpio.h b/arch/arm/include/asm/arch-armv8/gpio.h
> new file mode 100644
> index 0000000..0fbbcaf
> --- /dev/null
> +++ b/arch/arm/include/asm/arch-armv8/gpio.h
> @@ -0,0 +1,26 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#ifndef _ASM_ARMV8_GPIO_H_
> +#define _ASM_ARMV8_GPIO_H_
> +
> +#endif	/* _ASM_ARMV8_GPIO_H_ */
> diff --git a/arch/arm/include/asm/arch-armv8/mmu.h b/arch/arm/include/asm/arch-armv8/mmu.h
> new file mode 100644
> index 0000000..87412fc
> --- /dev/null
> +++ b/arch/arm/include/asm/arch-armv8/mmu.h
> @@ -0,0 +1,117 @@
> +/*
> + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#ifndef _ASM_ARMV8_MMU_H_
> +#define _ASM_ARMV8_MMU_H_
> +
> +#ifdef __ASSEMBLY__
> +#define _AC(X, Y)	X
> +#else
> +#define _AC(X, Y)	(X##Y)
> +#endif
> +
> +#define UL(x)		_AC(x, UL)
> +
> +/***************************************************************/
> +/*
> + * The following definitions are related each other, shoud be
> + * calculated specifically.
> + */
> +#define VA_BITS			(39)
> +
> +/* PAGE_SHIFT determines the page size */
> +#undef  PAGE_SIZE
> +#define PAGE_SHIFT		16
> +#define PAGE_SIZE		(1 << PAGE_SHIFT)
> +#define PAGE_MASK		(~(PAGE_SIZE-1))
> +
> +/*
> + * section address mask and size definitions.
> + */
> +#define SECTION_SHIFT		29
> +#define SECTION_SIZE		(UL(1) << SECTION_SHIFT)
> +#define SECTION_MASK		(~(SECTION_SIZE-1))
> +/***************************************************************/
> +
> +/*
> + * Memory types available.
> + */
> +#define MT_DEVICE_nGnRnE	0
> +#define MT_DEVICE_nGnRE		1
> +#define MT_DEVICE_GRE		2
> +#define MT_NORMAL_NC		3
> +#define MT_NORMAL		4
> +
> +#define MAIR(attr, mt)		((attr) << ((mt) * 8))
> +
> +/*
> + * Hardware page table definitions.
> + *
> + * Level 2 descriptor (PMD).
> + */
> +#define PMD_TYPE_MASK		(3 << 0)
> +#define PMD_TYPE_FAULT		(0 << 0)
> +#define PMD_TYPE_TABLE		(3 << 0)
> +#define PMD_TYPE_SECT		(1 << 0)
> +
> +/*
> + * Section
> + */
> +#define PMD_SECT_S		(3 << 8)
> +#define PMD_SECT_AF		(1 << 10)
> +#define PMD_SECT_NG		(1 << 11)
> +#define PMD_SECT_PXN		(UL(1) << 53)
> +#define PMD_SECT_UXN		(UL(1) << 54)
> +
> +/*
> + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
> + */
> +#define PMD_ATTRINDX(t)		((t) << 2)
> +#define PMD_ATTRINDX_MASK	(7 << 2)
> +
> +/*
> + * TCR flags.
> + */
> +#define TCR_T0SZ(x)		((64 - (x)) << 0)
> +#define TCR_IRGN_NC		(0 << 8)
> +#define TCR_IRGN_WBWA		(1 << 8)
> +#define TCR_IRGN_WT		(2 << 8)
> +#define TCR_IRGN_WBnWA		(3 << 8)
> +#define TCR_IRGN_MASK		(3 << 8)
> +#define TCR_ORGN_NC		(0 << 10)
> +#define TCR_ORGN_WBWA		(1 << 10)
> +#define TCR_ORGN_WT		(2 << 10)
> +#define TCR_ORGN_WBnWA		(3 << 10)
> +#define TCR_ORGN_MASK		(3 << 10)
> +#define TCR_SHARED_NON		(0 << 12)
> +#define TCR_SHARED_OUTER	(1 << 12)
> +#define TCR_SHARED_INNER	(2 << 12)
> +#define TCR_TG0_4K		(0 << 14)
> +#define TCR_TG0_64K		(1 << 14)
> +#define TCR_TG0_16K		(2 << 14)
> +#define TCR_EL1_IPS_40BIT	(2 << 32)
> +#define TCR_EL2_IPS_40BIT	(2 << 16)
> +
> +/* PTWs cacheable, inner/outer WBWA not shareable */
> +#define TCR_FLAGS		(TCR_IRGN_WBWA | TCR_ORGN_WBWA)
> +
> +#endif /* _ASM_ARMV8_MMU_H_ */
> diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h
> index c3489f1..7d3f9e4 100644
> --- a/arch/arm/include/asm/byteorder.h
> +++ b/arch/arm/include/asm/byteorder.h
> @@ -23,10 +23,22 @@
>  #  define __SWAB_64_THRU_32__
>  #endif
>  
> +#ifdef	CONFIG_ARMV8
> +
> +#ifdef __AARCH64EB__
> +#include <linux/byteorder/big_endian.h>
> +#else
> +#include <linux/byteorder/little_endian.h>
> +#endif
> +
> +#else	/* CONFIG_ARMV8 */
> +
>  #ifdef __ARMEB__
>  #include <linux/byteorder/big_endian.h>
>  #else
>  #include <linux/byteorder/little_endian.h>
>  #endif
>  
> +#endif	/* CONFIG_ARMV8 */
> +
>  #endif
> diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> index 6d60a4a..49a8a88 100644
> --- a/arch/arm/include/asm/cache.h
> +++ b/arch/arm/include/asm/cache.h
> @@ -11,6 +11,8 @@
>  
>  #include <asm/system.h>
>  
> +#ifndef CONFIG_ARMV8
> +
>  /*
>   * Invalidate L2 Cache using co-proc instruction
>   */
> @@ -28,6 +30,9 @@ void l2_cache_disable(void);
>  void set_section_dcache(int section, enum dcache_option option);
>  
>  void dram_bank_mmu_setup(int bank);
> +
> +#endif
> +
>  /*
>   * The current upper bound for ARM L1 data cache line sizes is 64 bytes.  We
>   * use that value for aligning DMA buffers unless the board config has specified
> diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
> index 99b703e..30f008e 100644
> --- a/arch/arm/include/asm/config.h
> +++ b/arch/arm/include/asm/config.h
> @@ -9,4 +9,14 @@
>  
>  #define CONFIG_LMB
>  #define CONFIG_SYS_BOOT_RAMDISK_HIGH
> +
> +#ifdef CONFIG_ARMV8
> +/*
> + * Currently, GOT is used to relocate u-boot and
> + * configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
> + */
> +#define CONFIG_NEEDS_MANUAL_RELOC
> +#define CONFIG_PHYS_64BIT
> +#endif
> +
>  #endif
> diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
> index 79a9597..b30dd5e 100644
> --- a/arch/arm/include/asm/global_data.h
> +++ b/arch/arm/include/asm/global_data.h
> @@ -47,6 +47,10 @@ struct arch_global_data {
>  
>  #include <asm-generic/global_data.h>
>  
> -#define DECLARE_GLOBAL_DATA_PTR     register volatile gd_t *gd asm ("r8")
> +#ifdef CONFIG_ARMV8
> +#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("x18")
> +#else
> +#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("r8")
> +#endif
>  
>  #endif /* __ASM_GBL_DATA_H */
> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> index 1fbc531..6a1f05a 100644
> --- a/arch/arm/include/asm/io.h
> +++ b/arch/arm/include/asm/io.h
> @@ -75,42 +75,45 @@ static inline phys_addr_t virt_to_phys(void * vaddr)
>  #define __arch_putw(v,a)		(*(volatile unsigned short *)(a) = (v))
>  #define __arch_putl(v,a)		(*(volatile unsigned int *)(a) = (v))
>  
> -extern inline void __raw_writesb(unsigned int addr, const void *data, int bytelen)
> +extern inline void __raw_writesb(unsigned long addr, const void *data,
> +				 int bytelen)
>  {
>  	uint8_t *buf = (uint8_t *)data;
>  	while(bytelen--)
>  		__arch_putb(*buf++, addr);
>  }
>  
> -extern inline void __raw_writesw(unsigned int addr, const void *data, int wordlen)
> +extern inline void __raw_writesw(unsigned long addr, const void *data,
> +				 int wordlen)
>  {
>  	uint16_t *buf = (uint16_t *)data;
>  	while(wordlen--)
>  		__arch_putw(*buf++, addr);
>  }
>  
> -extern inline void __raw_writesl(unsigned int addr, const void *data, int longlen)
> +extern inline void __raw_writesl(unsigned long addr, const void *data,
> +				 int longlen)
>  {
>  	uint32_t *buf = (uint32_t *)data;
>  	while(longlen--)
>  		__arch_putl(*buf++, addr);
>  }
>  
> -extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen)
> +extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
>  {
>  	uint8_t *buf = (uint8_t *)data;
>  	while(bytelen--)
>  		*buf++ = __arch_getb(addr);
>  }
>  
> -extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen)
> +extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
>  {
>  	uint16_t *buf = (uint16_t *)data;
>  	while(wordlen--)
>  		*buf++ = __arch_getw(addr);
>  }
>  
> -extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
> +extern inline void __raw_readsl(unsigned long addr, void *data, int longlen)
>  {
>  	uint32_t *buf = (uint32_t *)data;
>  	while(longlen--)
> diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
> index ff13f36..40fa300 100644
> --- a/arch/arm/include/asm/macro.h
> +++ b/arch/arm/include/asm/macro.h
> @@ -54,5 +54,39 @@
>  	bcs	1b
>  .endm
>  
> +#ifdef CONFIG_ARMV8
> +/*
> + * Register aliases.
> + */
> +lr	.req	x30
> +
> +/*
> + * Store register pairs to stack.
> + */
> +.macro	push, xreg1, xreg2
> +	stp	\xreg1, \xreg2, [sp, #-16]!
> +.endm
> +
> +/*
> + * Pop register pairs from stack.
> + */
> +.macro	pop, xreg1, xreg2
> +	ldp	\xreg1, \xreg2, [sp], #16
> +.endm
> +
> +/*
> + * Branch according to exception level
> + */
> +.macro	switch_el1_el2, xreg, el1_label, el2_label, fail_label
> +	mrs	\xreg, CurrentEL
> +	cmp	\xreg, 0x4
> +	b.eq	\el1_label
> +	cmp	\xreg, 0x8
> +	b.eq	\el2_label
> +	b	\fail_label
> +.endm
> +
> +#endif /* CONFIG_ARMV8 */
> +
>  #endif /* __ASSEMBLY__ */
>  #endif /* __ASM_ARM_MACRO_H__ */
> diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h
> index c412486..b2f90e7 100644
> --- a/arch/arm/include/asm/posix_types.h
> +++ b/arch/arm/include/asm/posix_types.h
> @@ -13,6 +13,8 @@
>  #ifndef __ARCH_ARM_POSIX_TYPES_H
>  #define __ARCH_ARM_POSIX_TYPES_H
>  
> +#include <config.h>
> +
>  /*
>   * This file is generally used by user-level software, so you need to
>   * be a little careful about namespace pollution etc.  Also, we cannot
> @@ -28,6 +30,16 @@ typedef int			__kernel_pid_t;
>  typedef unsigned short		__kernel_ipc_pid_t;
>  typedef unsigned short		__kernel_uid_t;
>  typedef unsigned short		__kernel_gid_t;
> +
> +#ifdef	CONFIG_ARMV8
> +typedef unsigned long		__kernel_size_t;
> +typedef long			__kernel_ssize_t;
> +typedef long			__kernel_ptrdiff_t;
> +typedef long			__kernel_time_t;
> +typedef long			__kernel_suseconds_t;
> +typedef long			__kernel_clock_t;
> +typedef long			__kernel_daddr_t;
> +#else	/* CONFIG_ARMV8 */
>  typedef unsigned int		__kernel_size_t;
>  typedef int			__kernel_ssize_t;
>  typedef int			__kernel_ptrdiff_t;
> @@ -35,6 +47,8 @@ typedef long			__kernel_time_t;
>  typedef long			__kernel_suseconds_t;
>  typedef long			__kernel_clock_t;
>  typedef int			__kernel_daddr_t;
> +#endif	/* CONFIG_ARMV8 */
> +
>  typedef char *			__kernel_caddr_t;
>  typedef unsigned short		__kernel_uid16_t;
>  typedef unsigned short		__kernel_gid16_t;
> @@ -44,6 +58,9 @@ typedef unsigned int		__kernel_gid32_t;
>  typedef unsigned short		__kernel_old_uid_t;
>  typedef unsigned short		__kernel_old_gid_t;
>  
> +typedef __kernel_uid_t		__kernel_old_uid_t;
> +typedef __kernel_gid_t		__kernel_old_gid_t;
> +
>  #ifdef __GNUC__
>  typedef long long		__kernel_loff_t;
>  #endif
> diff --git a/arch/arm/include/asm/proc-armv/ptrace.h b/arch/arm/include/asm/proc-armv/ptrace.h
> index 79cc644..d0cbb06 100644
> --- a/arch/arm/include/asm/proc-armv/ptrace.h
> +++ b/arch/arm/include/asm/proc-armv/ptrace.h
> @@ -12,6 +12,41 @@
>  
>  #include <linux/config.h>
>  
> +#ifdef CONFIG_ARMV8
> +
> +#define PCMASK		0
> +
> +#define S_X0		(0)
> +#define S_X1		(8)
> +#define S_X2		(16)
> +#define S_X3		(24)
> +#define S_X4		(32)
> +#define S_X5		(40)
> +#define S_X6		(48)
> +#define S_X7		(56)
> +#define S_LR		(240)
> +#define S_SP		(248)
> +#define S_PC		(256)
> +#define S_PSTATE	(264)
> +#define S_FRAME_SIZE	(272)
> +
> +#ifndef __ASSEMBLY__
> +
> +/*
> + * This struct defines the way the registers are stored on the stack during an
> + * exception.
> + */
> +struct pt_regs {
> +	unsigned long regs[31];
> +	unsigned long sp;
> +	unsigned long pc;
> +	unsigned long pstate;
> +};
> +
> +#endif	/* __ASSEMBLY__ */
> +
> +#else	/* CONFIG_ARMV8 */
> +
>  #define USR26_MODE	0x00
>  #define FIQ26_MODE	0x01
>  #define IRQ26_MODE	0x02
> @@ -106,4 +141,6 @@ static inline int valid_user_regs(struct pt_regs *regs)
>  
>  #endif	/* __ASSEMBLY__ */
>  
> +#endif	/* CONFIG_ARMV8 */
> +
>  #endif
> diff --git a/arch/arm/include/asm/proc-armv/system.h b/arch/arm/include/asm/proc-armv/system.h
> index b4cfa68..17096fc 100644
> --- a/arch/arm/include/asm/proc-armv/system.h
> +++ b/arch/arm/include/asm/proc-armv/system.h
> @@ -15,6 +15,60 @@
>  /*
>   * Save the current interrupt enable state & disable IRQs
>   */
> +#ifdef CONFIG_ARMV8
> +
> +/*
> + * Save the current interrupt enable state
> + * and disable IRQs/FIQs
> + */
> +#define local_irq_save(flags)					\
> +	({							\
> +	asm volatile(						\
> +	"mrs	%0, daif"					\
> +	"msr	daifset, #3"					\
> +	: "=r" (flags)						\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +/*
> + * restore saved IRQ & FIQ state
> + */
> +#define local_irq_restore(flags)				\
> +	({							\
> +	asm volatile(						\
> +	"msr	daif, %0"					\
> +	:							\
> +	: "r" (flags)						\
> +	: "memory");						\
> +	})
> +
> +/*
> + * Enable IRQs/FIQs
> + */
> +#define local_irq_enable()					\
> +	({							\
> +	asm volatile(						\
> +	"msr	daifclr, #3"					\
> +	:							\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +/*
> + * Disable IRQs/FIQs
> + */
> +#define local_irq_disable()					\
> +	({							\
> +	asm volatile(						\
> +	"msr	daifset, #3"					\
> +	:							\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +#else	/* CONFIG_ARMV8 */
> +
>  #define local_irq_save(x)					\
>  	({							\
>  		unsigned long temp;				\
> @@ -109,7 +163,10 @@
>  	: "r" (x)						\
>  	: "memory")
>  
> -#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
> +#endif	/* CONFIG_ARMV8 */
> +
> +#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || \
> +	defined(CONFIG_ARMV8)
>  /*
>   * On the StrongARM, "swp" is terminally broken since it bypasses the
>   * cache totally.  This means that the cache becomes inconsistent, and,
> diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
> index 760345f..0bc2e0f 100644
> --- a/arch/arm/include/asm/system.h
> +++ b/arch/arm/include/asm/system.h
> @@ -1,6 +1,80 @@
>  #ifndef __ASM_ARM_SYSTEM_H
>  #define __ASM_ARM_SYSTEM_H
>  
> +#ifdef CONFIG_ARMV8
> +
> +/*
> + * SCTLR_EL2 bits definitions
> + */
> +#define CR_M		(1 << 0)	/* MMU enable			*/
> +#define CR_A		(1 << 1)	/* Alignment abort enable	*/
> +#define CR_C		(1 << 2)	/* Dcache enable		*/
> +#define CR_SA		(1 << 3)	/* Stack Alignment Check Enable	*/
> +#define CR_I		(1 << 12)	/* Icache enable		*/
> +#define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
> +#define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
> +
> +#define PGTABLE_SIZE	(0x10000)
> +
> +#ifndef __ASSEMBLY__
> +
> +#define isb() __asm__ __volatile__ ("isb" : : : "memory")
> +
> +#define wfi() __asm__ __volatile__ ("wfi" : : : "memory")
> +
> +static inline unsigned int curent_el(void)
> +{
> +	unsigned int el;
> +	asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc");
> +	return el >> 2;
> +}
> +
> +static inline unsigned int get_sctlr(void)
> +{
> +	unsigned int el, val;
> +
> +	el = curent_el();
> +	if (el == 1)
> +		asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc");
> +	else if (el == 2)
> +		asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
> +	else
> +		panic("Not Supported Exception Level");
> +
> +	return val;
> +}
> +
> +static inline void set_sctlr(unsigned int val)
> +{
> +	unsigned int el;
> +
> +	el = curent_el();
> +	if (el == 1)
> +		asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc");
> +	else if (el == 2)
> +		asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
> +	else
> +		panic("Not Supported Exception Level");
> +
> +	asm volatile("isb");
> +}
> +
> +void __asm_flush_dcache_all(void);
> +void __asm_flush_dcache_range(u64 start, u64 end);
> +void __asm_invalidate_tlb_all(void);
> +void __asm_invalidate_icache_all(void);
> +
> +void v8_outer_cache_enable(void);
> +void v8_outer_cache_disable(void);
> +void v8_outer_cache_flush_all(void);
> +void v8_outer_cache_inval_all(void);
> +void v8_outer_cache_flush_range(u64 start, u64 end);
> +void v8_outer_cache_inval_range(u64 start, u64 end);
> +
> +#endif	/* __ASSEMBLY__ */
> +
> +#else /* CONFIG_ARMV8 */
> +
>  #ifdef __KERNEL__
>  
>  #define CPU_ARCH_UNKNOWN	0
> @@ -45,6 +119,8 @@
>  #define CR_AFE	(1 << 29)	/* Access flag enable			*/
>  #define CR_TE	(1 << 30)	/* Thumb exception enable		*/
>  
> +#define PGTABLE_SIZE		(4096 * 4)
> +
>  /*
>   * This is used to ensure the compiler did actually allocate the register we
>   * asked it for some inline assembly sequences.  Apparently we can't trust
> @@ -132,4 +208,6 @@ void mmu_page_table_flush(unsigned long start, unsigned long stop);
>  
>  #endif /* __KERNEL__ */
>  
> +#endif /* CONFIG_ARMV8 */
> +
>  #endif
> diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
> index 71dc049..b7794f3 100644
> --- a/arch/arm/include/asm/types.h
> +++ b/arch/arm/include/asm/types.h
> @@ -39,7 +39,11 @@ typedef unsigned int u32;
>  typedef signed long long s64;
>  typedef unsigned long long u64;
>  
> +#ifdef	CONFIG_ARMV8
> +#define BITS_PER_LONG 64
> +#else	/* CONFIG_ARMV8 */
>  #define BITS_PER_LONG 32
> +#endif	/* CONFIG_ARMV8 */
>  
>  /* Dma addresses are 32-bits wide.  */
>  
> diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h
> index 2b5fce8..3ef5538 100644
> --- a/arch/arm/include/asm/u-boot.h
> +++ b/arch/arm/include/asm/u-boot.h
> @@ -44,6 +44,10 @@ typedef struct bd_info {
>  #endif /* !CONFIG_SYS_GENERIC_BOARD */
>  
>  /* For image.h:image_check_target_arch() */
> +#ifndef CONFIG_ARMV8
>  #define IH_ARCH_DEFAULT IH_ARCH_ARM
> +#else
> +#define IH_ARCH_DEFAULT IH_ARCH_ARM64
> +#endif
>  
>  #endif	/* _U_BOOT_H_ */
> diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
> index 44593a8..0a228fb 100644
> --- a/arch/arm/include/asm/unaligned.h
> +++ b/arch/arm/include/asm/unaligned.h
> @@ -8,7 +8,7 @@
>  /*
>   * Select endianness
>   */
> -#ifndef __ARMEB__
> +#if __BYTE_ORDER == __LITTLE_ENDIAN
>  #define get_unaligned	__get_unaligned_le
>  #define put_unaligned	__put_unaligned_le
>  #else
> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> index 4e78723..86b19e2 100644
> --- a/arch/arm/lib/Makefile
> +++ b/arch/arm/lib/Makefile
> @@ -10,6 +10,7 @@ include $(TOPDIR)/config.mk
>  LIB	= $(obj)lib$(ARCH).o
>  LIBGCC	= $(obj)libgcc.o
>  
> +ifndef CONFIG_ARMV8
>  GLSOBJS	+= _ashldi3.o
>  GLSOBJS	+= _ashrdi3.o
>  GLSOBJS	+= _divsi3.o
> @@ -21,9 +22,12 @@ GLSOBJS	+= _umodsi3.o
>  GLCOBJS	+= div0.o
>  
>  SOBJS-y += crt0.o
> +endif
>  
>  ifndef CONFIG_SPL_BUILD
> +ifndef CONFIG_ARMV8
>  SOBJS-y += relocate.o
> +endif
>  ifndef CONFIG_SYS_GENERIC_BOARD
>  COBJS-y	+= board.o
>  endif
> @@ -38,11 +42,15 @@ else
>  COBJS-$(CONFIG_SPL_FRAMEWORK) += spl.o
>  endif
>  
> +ifndef CONFIG_ARMV8

But you have interrupts.c file?

>  COBJS-y	+= interrupts.o
> +endif
>  COBJS-y	+= reset.o
>  
>  COBJS-y	+= cache.o
> +ifndef CONFIG_ARMV8
>  COBJS-y	+= cache-cp15.o
> +endif
>  
>  SRCS	:= $(GLSOBJS:.o=.S) $(GLCOBJS:.o=.c) \
>  	   $(SOBJS-y:.o=.S) $(COBJS-y:.o=.c)
> diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
> index 34f50b0..61a87a8 100644
> --- a/arch/arm/lib/board.c
> +++ b/arch/arm/lib/board.c
> @@ -344,7 +344,7 @@ void board_init_f(ulong bootflag)
>  
>  #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
>  	/* reserve TLB table */
> -	gd->arch.tlb_size = 4096 * 4;
> +	gd->arch.tlb_size = PGTABLE_SIZE;
>  	addr -= gd->arch.tlb_size;
>  
>  	/* round down to next 64 kB limit */
> @@ -419,6 +419,7 @@ void board_init_f(ulong bootflag)
>  	}
>  #endif
>  
> +#ifndef CONFIG_ARMV8
>  	/* setup stackpointer for exeptions */
>  	gd->irq_sp = addr_sp;
>  #ifdef CONFIG_USE_IRQ
> @@ -431,6 +432,10 @@ void board_init_f(ulong bootflag)
>  
>  	/* 8-byte alignment for ABI compliance */
>  	addr_sp &= ~0x07;
> +#else	/* CONFIG_ARMV8 */
> +	/* 16-byte alignment for ABI compliance */
> +	addr_sp &= ~0x0f;

16-byte alignment will work for all of ARM. You don't really need an
ifdef here.

> +#endif	/* CONFIG_ARMV8 */
>  #else
>  	addr_sp += 128;	/* leave 32 words for abort-stack   */
>  	gd->irq_sp = addr_sp;
> @@ -523,6 +528,15 @@ void board_init_r(gd_t *id, ulong dest_addr)
>  
>  	debug("monitor flash len: %08lX\n", monitor_flash_len);
>  	board_init();	/* Setup chipselects */
> +
> +#ifdef CONFIG_NEEDS_MANUAL_RELOC
> +	/*
> +	 * We have to relocate the command table manually
> +	 */
> +	fixup_cmdtable(ll_entry_start(cmd_tbl_t, cmd),
> +			ll_entry_count(cmd_tbl_t, cmd));
> +#endif /* CONFIG_NEEDS_MANUAL_RELOC */
> +
>  	/*
>  	 * TODO: printing of the clock inforamtion of the board is now
>  	 * implemented as part of bdinfo command. Currently only support for
> diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
> index eefb456..149cb99 100644
> --- a/arch/arm/lib/bootm.c
> +++ b/arch/arm/lib/bootm.c
> @@ -222,6 +222,21 @@ static void boot_prep_linux(bootm_headers_t *images)
>  /* Subcommand: GO */
>  static void boot_jump_linux(bootm_headers_t *images, int flag)
>  {
> +#ifdef CONFIG_ARMV8
> +	void (*kernel_entry)(void *fdt_addr);
> +	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
> +
> +	kernel_entry = (void (*)(void *fdt_addr))images->ep;
> +
> +	debug("## Transferring control to Linux (at address %lx)...\n",
> +	      (ulong) kernel_entry);
> +	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
> +
> +	announce_and_cleanup(fake);
> +
> +	if (!fake)
> +		kernel_entry(images->ft_addr);
> +#else

This function could be refactored to avoid duplicating most of it.

>  	unsigned long machid = gd->bd->bi_arch_number;
>  	char *s;
>  	void (*kernel_entry)(int zero, int arch, uint params);
> @@ -236,8 +251,8 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
>  		printf("Using machid 0x%lx from environment\n", machid);
>  	}
>  
> -	debug("## Transferring control to Linux (at address %08lx)" \
> -		"...\n", (ulong) kernel_entry);
> +	debug("## Transferring control to Linux (at address %08lx)...\n",
> +	      (ulong) kernel_entry);

This is an unrelated change.

>  	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
>  	announce_and_cleanup(fake);
>  
> @@ -248,6 +263,7 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
>  
>  	if (!fake)
>  		kernel_entry(0, machid, r2);
> +#endif
>  }
>  
>  /* Main Entry point for arm bootm implementation
> diff --git a/common/image.c b/common/image.c
> index 56a5a62..7182549 100644
> --- a/common/image.c
> +++ b/common/image.c
> @@ -81,6 +81,7 @@ static const table_entry_t uimage_arch[] = {
>  	{	IH_ARCH_NDS32,		"nds32",	"NDS32",	},
>  	{	IH_ARCH_OPENRISC,	"or1k",		"OpenRISC 1000",},
>  	{	IH_ARCH_SANDBOX,	"sandbox",	"Sandbox",	},
> +	{	IH_ARCH_ARM64,		"arm64",	"AArch64",	},
>  	{	-1,			"",		"",		},
>  };
>  
> diff --git a/doc/README.armv8 b/doc/README.armv8
> new file mode 100644
> index 0000000..d348250
> --- /dev/null
> +++ b/doc/README.armv8
> @@ -0,0 +1,10 @@
> +Notes:
> +
> +1. Currenly, u-boot could be running at EL1 or EL2.
> +
> +2. GOT is used to relocate u-boot and CONFIG_NEEDS_MANUAL_RELOC is needed.
> +
> +3. Fdt should be placed in the first 512 megabytes from the start of the kernel image.
> +   So, fdt_high should be defined specially. Please reference linux/Documentation/arm64/booting.txt.

This doesn't sound correct. This should be "512M from the start of RAM."
The kernel image is at the start of RAM, but you do not necessarily load
your kernel image in u-boot to that location. A zImage for example is
loaded somewhere outside the uncompressed kernel location.

Rob

> +
> +4. Generic board is supported.
> diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c
> index 8fb1765..a58147c 100644
> --- a/examples/standalone/stubs.c
> +++ b/examples/standalone/stubs.c
> @@ -39,6 +39,20 @@ gd_t *global_data;
>  "	bctr\n"				\
>  	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11");
>  #elif defined(CONFIG_ARM)
> +#ifdef CONFIG_ARMV8
> +/*
> + * x18 holds the pointer to the global_data, x9 is a call-clobbered
> + * register
> + */
> +#define EXPORT_FUNC(x) \
> +	asm volatile (			\
> +"	.globl " #x "\n"		\
> +#x ":\n"				\
> +"	ldr	x9, [x18, %0]\n"		\
> +"	ldr	x9, [x9, %1]\n"		\
> +"	br	x9\n"		\
> +	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x9");
> +#else
>  /*
>   * r8 holds the pointer to the global_data, ip is a call-clobbered
>   * register
> @@ -50,6 +64,7 @@ gd_t *global_data;
>  "	ldr	ip, [r8, %0]\n"		\
>  "	ldr	pc, [ip, %1]\n"		\
>  	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip");
> +#endif
>  #elif defined(CONFIG_MIPS)
>  /*
>   * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call-
> diff --git a/include/image.h b/include/image.h
> index f93a393..12262d7 100644
> --- a/include/image.h
> +++ b/include/image.h
> @@ -156,6 +156,7 @@ struct lmb;
>  #define IH_ARCH_SANDBOX		19	/* Sandbox architecture (test only) */
>  #define IH_ARCH_NDS32	        20	/* ANDES Technology - NDS32  */
>  #define IH_ARCH_OPENRISC        21	/* OpenRISC 1000  */
> +#define IH_ARCH_ARM64		22	/* ARM64	*/
>  
>  /*
>   * Image Types
>
fenghua@phytium.com.cn - Sept. 11, 2013, 1:39 p.m.
hi Rob,
    Thank you for your checking of this patch.

> -----原始邮件-----
> 发件人: "Rob Herring" <robherring2@gmail.com>
> 发送时间: 2013年9月11日 星期三
> 收件人: fenghua@phytium.com.cn
> 抄送: u-boot@lists.denx.de, trini@ti.com
> 主题: Re: [U-Boot] [PATCH v7 1/5] core support of arm64
> 
> On 09/10/2013 03:12 AM, fenghua@phytium.com.cn wrote:
> > From: David Feng <fenghua@phytium.com.cn>
> > 
> > Signed-off-by: David Feng <fenghua@phytium.com.cn>
> > ---
> >  MAINTAINERS                             |    4 +
> >  arch/arm/config.mk                      |    4 +
> >  arch/arm/cpu/armv8/Makefile             |   56 +++++++
> >  arch/arm/cpu/armv8/cache.S              |  145 ++++++++++++++++
> >  arch/arm/cpu/armv8/cache_v8.c           |  275 +++++++++++++++++++++++++++++++
> >  arch/arm/cpu/armv8/config.mk            |   31 ++++
> >  arch/arm/cpu/armv8/cpu.c                |   68 ++++++++
> >  arch/arm/cpu/armv8/crt0.S               |  130 +++++++++++++++
> >  arch/arm/cpu/armv8/exceptions.S         |  173 +++++++++++++++++++
> >  arch/arm/cpu/armv8/interrupts.c         |  158 ++++++++++++++++++
> >  arch/arm/cpu/armv8/relocate.S           |   73 ++++++++
> >  arch/arm/cpu/armv8/start.S              |  253 ++++++++++++++++++++++++++++
> >  arch/arm/cpu/armv8/timer.c              |   97 +++++++++++
> >  arch/arm/cpu/armv8/tlb.S                |   45 +++++
> >  arch/arm/cpu/armv8/u-boot.lds           |   83 ++++++++++
> >  arch/arm/include/asm/arch-armv8/gpio.h  |   26 +++
> >  arch/arm/include/asm/arch-armv8/mmu.h   |  117 +++++++++++++
> >  arch/arm/include/asm/byteorder.h        |   12 ++
> >  arch/arm/include/asm/cache.h            |    5 +
> >  arch/arm/include/asm/config.h           |   10 ++
> >  arch/arm/include/asm/global_data.h      |    6 +-
> >  arch/arm/include/asm/io.h               |   15 +-
> >  arch/arm/include/asm/macro.h            |   34 ++++
> >  arch/arm/include/asm/posix_types.h      |   17 ++
> >  arch/arm/include/asm/proc-armv/ptrace.h |   37 +++++
> >  arch/arm/include/asm/proc-armv/system.h |   59 ++++++-
> >  arch/arm/include/asm/system.h           |   78 +++++++++
> >  arch/arm/include/asm/types.h            |    4 +
> >  arch/arm/include/asm/u-boot.h           |    4 +
> >  arch/arm/include/asm/unaligned.h        |    2 +-
> >  arch/arm/lib/Makefile                   |    8 +
> >  arch/arm/lib/board.c                    |   16 +-
> >  arch/arm/lib/bootm.c                    |   20 ++-
> >  common/image.c                          |    1 +
> >  doc/README.armv8                        |   10 ++
> >  examples/standalone/stubs.c             |   15 ++
> >  include/image.h                         |    1 +
> >  37 files changed, 2080 insertions(+), 12 deletions(-)
> 
> This patch is a bit large. It would be better to split into preparation
> patches and one to add armv8.
> 
> >  create mode 100644 arch/arm/cpu/armv8/Makefile
> >  create mode 100644 arch/arm/cpu/armv8/cache.S
> >  create mode 100644 arch/arm/cpu/armv8/cache_v8.c
> >  create mode 100644 arch/arm/cpu/armv8/config.mk
> >  create mode 100644 arch/arm/cpu/armv8/cpu.c
> >  create mode 100644 arch/arm/cpu/armv8/crt0.S
> >  create mode 100644 arch/arm/cpu/armv8/exceptions.S
> >  create mode 100644 arch/arm/cpu/armv8/interrupts.c
> >  create mode 100644 arch/arm/cpu/armv8/relocate.S
> >  create mode 100644 arch/arm/cpu/armv8/start.S
> >  create mode 100644 arch/arm/cpu/armv8/timer.c
> >  create mode 100644 arch/arm/cpu/armv8/tlb.S
> >  create mode 100644 arch/arm/cpu/armv8/u-boot.lds
> >  create mode 100644 arch/arm/include/asm/arch-armv8/gpio.h
> >  create mode 100644 arch/arm/include/asm/arch-armv8/mmu.h
> >  create mode 100644 doc/README.armv8
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 6e50fc4..d142307 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -1095,6 +1095,10 @@ Sergey Yanovich <ynvich@gmail.com>
> >  
> >  	lp8x4x		xscale/pxa
> >  
> > +David Feng <fenghua@phytium.com.cn>
> > +
> > +	vexpress_aemv8a		ARM ARMV8 (Quad Core)
> > +
> 
> This belongs in the patch adding the board support.
> 
Yes, I'll move it.

> >  -------------------------------------------------------------------------
> >  
> >  Unknown / orphaned boards:
> > diff --git a/arch/arm/config.mk b/arch/arm/config.mk
> > index ce3903b..f1c6a7b 100644
> > --- a/arch/arm/config.mk
> > +++ b/arch/arm/config.mk
> > @@ -74,7 +74,9 @@ endif
> >  endif
> >  
> >  # needed for relocation
> > +ifndef CONFIG_ARMV8
> >  LDFLAGS_u-boot += -pie
> > +endif
> >  
> >  #
> >  # FIXME: binutils versions < 2.22 have a bug in the assembler where
> > @@ -95,6 +97,8 @@ endif
> >  endif
> >  
> >  # check that only R_ARM_RELATIVE relocations are generated
> > +ifndef CONFIG_ARMV8
> >  ifneq ($(CONFIG_SPL_BUILD),y)
> >  ALL-y	+= checkarmreloc
> >  endif
> > +endif
> > diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> > new file mode 100644
> > index 0000000..55fd365
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/Makefile
> > @@ -0,0 +1,56 @@
> > +#
> > +# Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
> > +#
> > +# See file CREDITS for list of people who contributed to this
> > +# project.
> > +#
> > +# This program is free software; you can redistribute it and/or
> > +# modify it under the terms of the GNU General Public License as
> > +# published by the Free Software Foundatio; either version 2 of
> > +# the License, or (at your option) any later version.
> > +#
> > +# This program is distributed in the hope that it will be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this program; if not, write to the Free Software
> > +# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > +# MA 02111-1307 USA
> 
> Shouldn't this and several other places use the new license tag.
> 
Yes, I'll modify related files in the next version.

> > +#
> > +
> > +include $(TOPDIR)/config.mk
> > +
> > +LIB	= $(obj)lib$(CPU).o
> > +
> > +START	:= start.o
> > +
> > +COBJS	+= cpu.o
> > +COBJS	+= timer.o
> > +COBJS	+= cache_v8.o
> > +COBJS	+= interrupts.o
> > +
> > +SOBJS	+= crt0.o
> > +SOBJS	+= relocate.o
> > +SOBJS	+= exceptions.o
> > +SOBJS	+= cache.o
> > +SOBJS	+= tlb.o
> > +
> > +SRCS	:= $(START:.o=.S) $(COBJS:.o=.c)
> > +OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
> > +START	:= $(addprefix $(obj),$(START))
> > +
> > +all:	$(obj).depend $(START) $(LIB)
> > +
> > +$(LIB):	$(OBJS)
> > +	$(call cmd_link_o_target, $(OBJS))
> > +
> > +#########################################################################
> > +
> > +# defines $(obj).depend target
> > +include $(SRCTREE)/rules.mk
> > +
> > +sinclude $(obj).depend
> > +
> > +#########################################################################
> > diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
> > new file mode 100644
> > index 0000000..050c1c0
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/cache.S
> > @@ -0,0 +1,145 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <asm-offsets.h>
> > +#include <config.h>
> > +#include <version.h>
> > +#include <asm/macro.h>
> > +#include <linux/linkage.h>
> > +
> > +/*
> > + * void __asm_flush_dcache_level(level)
> > + *
> > + * clean and invalidate one level cache.
> > + *
> > + * x0: cache level
> > + * x1~x9: clobbered
> > + */
> > +ENTRY(__asm_flush_dcache_level)
> > +	lsl	x1, x0, #1
> > +	msr	csselr_el1, x1		/* select cache level */
> > +	isb				/* isb to sych the new cssr & csidr */
> > +	mrs	x6, ccsidr_el1		/* read the new ccsidr */
> > +	and	x2, x6, #7		/* x2 <- length of the cache lines */
> > +	add	x2, x2, #4		/* add 4 (line length offset) */
> > +	mov	x3, #0x3ff
> > +	and	x3, x3, x6, lsr #3	/* x3 <- maximum number of way size */
> > +	clz	w5, w3			/* bit position of way size */
> > +	mov	x4, #0x7fff
> > +	and	x4, x4, x1, lsr #13	/* x4 <- max number of the set size */
> > +	/* x1 <- cache level << 1 */
> > +	/* x2 <- line length offset */
> > +	/* x3 <- number of cache ways */
> > +	/* x4 <- number of cache sets */
> > +	/* x5 <- bit position of way size */
> > +
> > +loop_set:
> > +	mov	x6, x3			/* create working copy of way size */
> > +loop_way:
> > +	lsl	x7, x6, x5
> > +	orr	x9, x0, x7		/* map way and level to cisw value */
> > +	lsl	x7, x4, x2
> > +	orr	x9, x9, x7		/* map set number to cisw value */
> > +	dc	cisw, x9		/* clean & invalidate by set/way */
> > +	subs	x6, x6, #1		/* decrement the way */
> > +	b.ge	loop_way
> > +	subs	x4, x4, #1		/* decrement the set */
> > +	b.ge	loop_set
> > +
> > +	ret
> > +ENDPROC(__asm_flush_dcache_level)
> > +
> > +/*
> > + * void __asm_flush_dcache_all(void)
> > + *
> > + * clean and invalidate all data cache by SET/WAY.
> > + */
> > +ENTRY(__asm_flush_dcache_all)
> > +	dsb	sy
> > +	mov	x15, lr
> > +	mrs	x10, clidr_el1		/* read clidr */
> > +	lsr	x11, x10, #24
> > +	and	x11, x11, #0x7		/* x11 <- loc */
> > +	cbz	x11, finished		/* if loc is 0, no need to clean */
> > +	mov	x0, #0			/* start flush at cache level 0 */
> > +	/* x0  <- cache level */
> > +	/* x10 <- clidr_el1 */
> > +	/* x11 <- loc */
> > +
> > +loop_level:
> > +	lsl	x1, x0, #1
> > +	add	x1, x1, x0		/* x0 <- 3x cache level */
> > +	lsr	x1, x10, x1
> > +	and	x1, x1, #7		/* x1 <- cache type */
> > +	cmp	x1, #2
> > +	b.lt	skip			/* skip if no cache or icache */
> > +	bl	__asm_flush_dcache_level
> > +skip:
> > +	add	x0, x0, #1		/* increment cache level */
> > +	cmp	x11, x0
> > +	b.gt	loop_level
> > +
> > +finished:
> > +	mov	x0, #0
> > +	msr	csselr_el1, x0		/* swith back to cache level 0 */
> > +	dsb	sy
> > +	isb
> > +	mov	lr, x15
> > +	ret
> > +ENDPROC(__asm_flush_dcache_all)
> > +
> > +/*
> > + * void __asm_flush_dcache_range(start, end)
> > + *
> > + * clean & invalidate data cache in the range
> > + *
> > + * x0: start address
> > + * x1: end address
> > + */
> > +ENTRY(__asm_flush_dcache_range)
> > +	mrs	x3, ctr_el0		/* read CTR */
> > +	lsr	x3, x3, #16
> > +	and	x3, x3, #0xf		/* cache line size encoding */
> > +	mov	x2, #4			/* bytes per word */
> > +	lsl	x2, x2, x3		/* actual cache line size */
> > +
> > +	/* x2 <- minimal cache line size in cache system */
> > +	sub	x3, x2, #1
> > +	bic	x0, x0, x3
> > +1:      dc	civac, x0		/* clean & invalidate D/unified line */
> > +	add	x0, x0, x2
> > +	cmp	x0, x1
> > +	b.lo	1b
> > +	dsb	sy
> > +	ret
> > +ENDPROC(__asm_flush_dcache_range)
> > +
> > +/*
> > + * void __asm_invalidate_icache_all(void)
> > + *
> > + * invalidate all tlb entries.
> > + */
> > +ENTRY(__asm_invalidate_icache_all)
> > +	ic	ialluis
> > +	isb	sy
> > +	ret
> > +ENDPROC(__asm_invalidate_icache_all)
> > diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
> > new file mode 100644
> > index 0000000..56a1489
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/cache_v8.c
> > @@ -0,0 +1,275 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <common.h>
> > +#include <asm/system.h>
> > +#include <asm/arch/mmu.h>
> > +
> > +DECLARE_GLOBAL_DATA_PTR;
> > +
> > +#ifndef CONFIG_SYS_DCACHE_OFF
> > +
> > +static void set_pgtable_section(u64 section, u64 memory_type)
> > +{
> > +	u64 *page_table = (u64 *)gd->arch.tlb_addr;
> > +	u64 value;
> > +
> > +	value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
> > +	value |= PMD_ATTRINDX(memory_type);
> > +	page_table[section] = value;
> > +}
> > +
> > +/* to activate the MMU we need to set up virtual memory */
> > +static inline void mmu_setup(void)
> > +{
> > +	int i, j, el;
> > +	bd_t *bd = gd->bd;
> > +
> > +	/* Setup an identity-mapping for all spaces */
> > +	for (i = 0; i < (PAGE_SIZE >> 3); i++)
> > +		set_pgtable_section(i, MT_DEVICE_nGnRnE);
> > +
> > +	/* Setup an identity-mapping for all RAM space */
> > +	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
> > +		ulong start = bd->bi_dram[i].start;
> > +		ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size;
> > +		for (j = start >> SECTION_SHIFT;
> > +		     j < end >> SECTION_SHIFT; j++) {
> > +			set_pgtable_section(j, MT_NORMAL);
> > +		}
> > +	}
> > +
> > +	/* load TTBR0 */
> > +	el = curent_el();
> > +	if (el == 1)
> > +		asm volatile("msr ttbr0_el1, %0"
> > +			     : : "r" (gd->arch.tlb_addr) : "memory");
> > +	else if (el == 2)
> > +		asm volatile("msr ttbr0_el2, %0"
> > +			     : : "r" (gd->arch.tlb_addr) : "memory");
> > +	else
> > +		panic("Not Supported Exception Level");
> > +
> > +	/* enable the mmu */
> > +	set_sctlr(get_sctlr() | CR_M);
> > +}
> > +
> > +/*
> > + * Performs a invalidation of the entire data cache
> > + * at all levels
> > + */
> > +void invalidate_dcache_all(void)
> > +{
> > +	__asm_flush_dcache_all();
> > +	v8_outer_cache_inval_all();
> > +}
> > +
> > +/*
> > + * Performs a clean & invalidation of the entire data cache
> > + * at all levels
> > + */
> > +void flush_dcache_all(void)
> > +{
> > +	__asm_flush_dcache_all();
> > +	v8_outer_cache_flush_all();
> > +}
> > +
> > +/*
> > + * Invalidates range in all levels of D-cache/unified cache used:
> > + * Affects the range [start, stop - 1]
> > + */
> > +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> > +{
> > +	__asm_flush_dcache_range(start, stop);
> > +	v8_outer_cache_inval_range(start, stop);
> > +}
> > +
> > +/*
> > + * Flush range(clean & invalidate) from all levels of D-cache/unified
> > + * cache used:
> > + * Affects the range [start, stop - 1]
> > + */
> > +void flush_dcache_range(unsigned long start, unsigned long stop)
> > +{
> > +	__asm_flush_dcache_range(start, stop);
> > +	v8_outer_cache_flush_range(start, stop);
> > +}
> > +
> > +void dcache_enable(void)
> > +{
> > +	uint32_t sctlr;
> > +
> > +	sctlr = get_sctlr();
> > +
> > +	/* The data cache is not active unless the mmu is enabled too */
> > +	if (!(sctlr & CR_M)) {
> > +		v8_outer_cache_enable();
> > +		invalidate_dcache_all();
> > +		__asm_invalidate_tlb_all();
> > +		mmu_setup();
> > +	}
> > +
> > +	set_sctlr(sctlr | CR_C);
> > +}
> > +
> > +void dcache_disable(void)
> > +{
> > +	uint32_t sctlr;
> > +
> > +	sctlr = get_sctlr();
> > +
> > +	/* if cache isn't enabled no need to disable */
> > +	if (!(sctlr & CR_C))
> > +		return;
> > +
> > +	set_sctlr(sctlr & ~(CR_C|CR_M));
> > +
> > +	flush_dcache_all();
> > +	__asm_invalidate_tlb_all();
> > +}
> > +
> > +int dcache_status(void)
> > +{
> > +	return (get_sctlr() & CR_C) != 0;
> > +}
> > +
> > +#else	/* CONFIG_SYS_DCACHE_OFF */
> > +
> > +void invalidate_dcache_all(void)
> > +{
> > +}
> > +
> > +void flush_dcache_all(void)
> > +{
> > +}
> > +
> > +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> > +{
> > +}
> > +
> > +void flush_dcache_range(unsigned long start, unsigned long stop)
> > +{
> > +}
> > +
> > +void dcache_enable(void)
> > +{
> > +}
> > +
> > +void dcache_disable(void)
> > +{
> > +}
> > +
> > +int dcache_status(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +#endif	/* CONFIG_SYS_DCACHE_OFF */
> > +
> > +#ifndef CONFIG_SYS_ICACHE_OFF
> > +
> > +void icache_enable(void)
> > +{
> > +	set_sctlr(get_sctlr() | CR_I);
> > +}
> > +
> > +void icache_disable(void)
> > +{
> > +	set_sctlr(get_sctlr() & ~CR_I);
> > +}
> > +
> > +int icache_status(void)
> > +{
> > +	return (get_sctlr() & CR_I) != 0;
> > +}
> > +
> > +void invalidate_icache_all(void)
> > +{
> > +	__asm_invalidate_icache_all();
> > +}
> > +
> > +#else	/* CONFIG_SYS_ICACHE_OFF */
> > +
> > +void icache_enable(void)
> > +{
> > +}
> > +
> > +void icache_disable(void)
> > +{
> > +}
> > +
> > +int icache_status(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +void invalidate_icache_all(void)
> > +{
> > +}
> > +
> > +#endif	/* CONFIG_SYS_ICACHE_OFF */
> > +
> > +/*
> > + * Enable dCache & iCache, whether cache is actually enabled
> > + * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
> > + */
> > +void enable_caches(void)
> > +{
> > +	icache_enable();
> > +	dcache_enable();
> > +}
> > +
> > +/*
> > + * Flush range from all levels of d-cache/unified-cache used:
> > + * Affects the range [start, start + size - 1]
> > + */
> > +void flush_cache(unsigned long start, unsigned long size)
> > +{
> > +	flush_dcache_range(start, start + size);
> > +}
> > +
> > +/*
> > + * Stub implementations for outer cache operations
> > + */
> > +void __v8_outer_cache_enable(void) {}
> > +void v8_outer_cache_enable(void)
> > +	__attribute__((weak, alias("__v8_outer_cache_enable")));
> 
> These can just be:
> 
> void __weak v8_outer_cache_enable(void) {}
> 
This format do not works at aarch64-gcc.

> > +
> > +void __v8_outer_cache_disable(void) {}
> > +void v8_outer_cache_disable(void)
> > +	__attribute__((weak, alias("__v8_outer_cache_disable")));
> > +
> > +void __v8_outer_cache_flush_all(void) {}
> > +void v8_outer_cache_flush_all(void)
> > +	__attribute__((weak, alias("__v8_outer_cache_flush_all")));
> > +
> > +void __v8_outer_cache_inval_all(void) {}
> > +void v8_outer_cache_inval_all(void)
> > +	__attribute__((weak, alias("__v8_outer_cache_inval_all")));
> > +
> > +void __v8_outer_cache_flush_range(u64 start, u64 end) {}
> > +void v8_outer_cache_flush_range(u64 start, u64 end)
> > +	__attribute__((weak, alias("__v8_outer_cache_flush_range")));
> > +
> > +void __v8_outer_cache_inval_range(u64 start, u64 end) {}
> > +void v8_outer_cache_inval_range(u64 start, u64 end)
> > +	__attribute__((weak, alias("__v8_outer_cache_inval_range")));
> > diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
> > new file mode 100644
> > index 0000000..aae2170
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/config.mk
> > @@ -0,0 +1,31 @@
> > +#
> > +# Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
> > +#
> > +# See file CREDITS for list of people who contributed to this
> > +# project.
> > +#
> > +# This program is free software; you can redistribute it and/or
> > +# modify it under the terms of the GNU General Public License as
> > +# published by the Free Software Foundation; either version 2 of
> > +# the License, or (at your option) any later version.
> > +#
> > +# This program is distributed in the hope that it will be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this program; if not, write to the Free Software
> > +# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > +# MA 02111-1307 USA
> > +#
> > +PLATFORM_RELFLAGS += -fno-common -ffixed-x18
> > +
> > +# SEE README.arm-unaligned-accesses
> > +PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
> > +PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
> > +
> > +PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
> > +PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
> > +PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
> > +PLATFORM_CPPFLAGS += -fpic
> > diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
> > new file mode 100644
> > index 0000000..76e76b6
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/cpu.c
> > @@ -0,0 +1,68 @@
> > +/*
> > + * (C) Copyright 2008 Texas Insturments
> > + *
> > + * (C) Copyright 2002
> > + * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
> > + * Marius Groeger <mgroeger@sysgo.de>
> > + *
> > + * (C) Copyright 2002
> > + * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
> > + *
> > + * SPDX-License-Identifier:	GPL-2.0+
> > + */
> > +
> > +/*
> > + * CPU specific code
> > + */
> > +
> > +#include <common.h>
> > +#include <command.h>
> > +#include <asm/system.h>
> > +#include <linux/compiler.h>
> > +
> > +void __weak cpu_cache_initialization(void){}
> > +
> > +int cleanup_before_linux(void)
> > +{
> > +	/*
> > +	 * this function is called just before we call linux
> > +	 * it prepares the processor for linux
> > +	 *
> > +	 * we turn off caches etc ...
> > +	 */
> > +#ifndef CONFIG_SPL_BUILD
> > +	disable_interrupts();
> > +#endif
> > +
> > +	/*
> > +	 * Turn off I-cache and invalidate it
> > +	 */
> > +	icache_disable();
> > +	invalidate_icache_all();
> > +
> > +	/*
> > +	 * turn off D-cache
> > +	 * dcache_disable() in turn flushes the d-cache and disables MMU
> > +	 */
> > +	dcache_disable();
> > +	v8_outer_cache_disable();
> > +
> > +	/*
> > +	 * After D-cache is flushed and before it is disabled there may
> > +	 * be some new valid entries brought into the cache. We are sure
> > +	 * that these lines are not dirty and will not affect our execution.
> > +	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
> > +	 * is all we did during this. We have not pushed anything on to the
> > +	 * stack. Neither have we affected any static data)
> > +	 * So just invalidate the entire d-cache again to avoid coherency
> > +	 * problems for kernel
> > +	 */
> > +	invalidate_dcache_all();
> > +
> > +	/*
> > +	 * Some CPU need more cache attention before starting the kernel.
> > +	 */
> > +	cpu_cache_initialization();
> > +
> > +	return 0;
> > +}
> > diff --git a/arch/arm/cpu/armv8/crt0.S b/arch/arm/cpu/armv8/crt0.S
> > new file mode 100644
> > index 0000000..97d6806
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/crt0.S
> > @@ -0,0 +1,130 @@
> > +/*
> > + * crt0 - C-runtime startup Code for AArch64 U-Boot
> > + *
> > + * Copyright (c) 2013  David Feng <fenghua@phytium.com.cn>
> > + *
> > + * Copyright (c) 2012  Albert ARIBAUD <albert.u.boot@aribaud.net>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <config.h>
> > +#include <asm-offsets.h>
> > +#include <asm/macro.h>
> > +#include <linux/linkage.h>
> > +
> > +/*
> > + * This file handles the target-independent stages of the U-Boot
> > + * start-up where a C runtime environment is needed. Its entry point
> > + * is _main and is branched into from the target's start.S file.
> > + *
> > + * _main execution sequence is:
> > + *
> > + * 1. Set up initial environment for calling board_init_f().
> > + *    This environment only provides a stack and a place to store
> > + *    the GD ('global data') structure, both located in some readily
> > + *    available RAM (SRAM, locked cache...). In this context, VARIABLE
> > + *    global data, initialized or not (BSS), are UNAVAILABLE; only
> > + *    CONSTANT initialized data are available.
> > + *
> > + * 2. Call board_init_f(). This function prepares the hardware for
> > + *    execution from system RAM (DRAM, DDR...) As system RAM may not
> > + *    be available yet, , board_init_f() must use the current GD to
> > + *    store any data which must be passed on to later stages. These
> > + *    data include the relocation destination, the future stack, and
> > + *    the future GD location.
> > + *
> > + * (the following applies only to non-SPL builds)
> > + *
> > + * 3. Set up intermediate environment where the stack and GD are the
> > + *    ones allocated by board_init_f() in system RAM, but BSS and
> > + *    initialized non-const data are still not available.
> > + *
> > + * 4. Call relocate_code(). This function relocates U-Boot from its
> > + *    current location into the relocation destination computed by
> > + *    board_init_f().
> > + *
> > + * 5. Set up final environment for calling board_init_r(). This
> > + *    environment has BSS (initialized to 0), initialized non-const
> > + *    data (initialized to their intended value), and stack in system
> > + *    RAM. GD has retained values set by board_init_f(). Some CPUs
> > + *    have some work left to do at this point regarding memory, so
> > + *    call c_runtime_cpu_setup.
> > + *
> > + * 6. Branch to board_init_r().
> > + */
> > +
> > +ENTRY(_main)
> > +
> > +/*
> > + * Set up initial C runtime environment and call board_init_f(0).
> > + */
> > +	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
> > +	sub	x0, x0, #GD_SIZE	/* allocate one GD above SP */
> > +	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
> > +	mov	x18, sp			/* GD is above SP */
> > +	mov	x0, #0
> > +	bl	board_init_f
> > +
> > +/*
> > + * Set up intermediate environment (new sp and gd) and call
> > + * relocate_code(addr_moni). Trick here is that we'll return
> > + * 'here' but relocated.
> > + */
> > +	ldr	x0, [x18, #GD_START_ADDR_SP]	/* x0 <- gd->start_addr_sp */
> > +	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
> > +	ldr	x18, [x18, #GD_BD]		/* x18 <- gd->bd */
> > +	sub	x18, x18, #GD_SIZE		/* new GD is below bd */
> > +
> > +	adr	lr, relocation_return
> > +	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
> > +	add	lr, lr, x9	/* new return address after relocation */
> > +	ldr	x0, [x18, #GD_RELOCADDR]	/* x0 <- gd->relocaddr */
> > +	b	relocate_code
> > +
> > +relocation_return:
> > +
> > +/*
> > + * Set up final (full) environment
> > + */
> > +	bl	c_runtime_cpu_setup		/* still call old routine */
> > +
> > +/*
> > + * Clear BSS section
> > + */
> > +	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
> > +	ldr	x0, =__bss_start
> > +	add	x0, x0, x9			/* x0 <- __bss_start in RAM */
> > +	ldr	x1, =__bss_end
> > +	add	x1, x1, x9			/* x1 <- __bss_end in RAM */
> > +	mov	x2, #0
> > +clear_loop:
> > +	str	x2, [x0]
> > +	add	x0, x0, #8
> > +	cmp	x0, x1
> > +	b.lo	clear_loop
> > +
> > +	/* call board_init_r(gd_t *id, ulong dest_addr) */
> > +	mov	x0, x18				/* gd_t */
> > +	ldr	x1, [x18, #GD_RELOCADDR]	/* dest_addr */
> > +	b	board_init_r			/* PC relative jump */
> > +
> > +	/* NOTREACHED - board_init_r() does not return */
> > +
> > +ENDPROC(_main)
> > diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
> > new file mode 100644
> > index 0000000..2a3962b
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/exceptions.S
> > @@ -0,0 +1,173 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <asm-offsets.h>
> > +#include <config.h>
> > +#include <version.h>
> > +#include <asm/ptrace.h>
> > +#include <asm/macro.h>
> > +#include <linux/linkage.h>
> > +
> > +/*
> > + * Enter Exception.
> > + * This will save the processor state that is X0~X29/LR/SP/ELR/PSTATE
> > + * to the stack frame.
> > + */
> > +#define	EXCEPTION_ENTRY				\
> > +	sub	sp, sp, S_FRAME_SIZE - S_LR	;\
> > +	push	x28, x29			;\
> > +	push	x26, x27			;\
> > +	push	x24, x25			;\
> > +	push	x22, x23			;\
> > +	push	x20, x21			;\
> > +	push	x18, x19			;\
> > +	push	x16, x17			;\
> > +	push	x14, x15			;\
> > +	push	x12, x13			;\
> > +	push	x10, x11			;\
> > +	push	x8, x9				;\
> > +	push	x6, x7				;\
> > +	push	x4, x5				;\
> > +	push	x2, x3				;\
> > +	push	x0, x1				;\
> > +	add	x21, sp, S_FRAME_SIZE		;\
> > +						;\
> > +	/* Could be running at EL1 or EL2 */	;\
> > +	mrs	x0, CurrentEL			;\
> > +	cmp	x0, 0x4				;\
> > +	b.eq	1f				;\
> > +	cmp	x0, 0x8				;\
> > +	b.eq	2f				;\
> > +	b	3f				;\
> > +1:	mrs	x22, elr_el1			;\
> > +	mrs	x23, spsr_el1			;\
> > +	mrs	x1, esr_el1			;\
> > +	b	3f				;\
> > +2:	mrs	x22, elr_el2			;\
> > +	mrs	x23, spsr_el2			;\
> > +	mrs	x1, esr_el2			;\
> > +3:						;\
> > +	stp	lr, x21, [sp, S_LR]		;\
> > +	stp	x22, x23, [sp, S_PC]		;\
> > +	mov	x0, sp
> > +
> > +/*
> > + * Exit Exception.
> > + * This will restore the processor state that is X0~X29/LR/SP/ELR/PSTATE
> > + * from the stack frame and return from exceprion.
> > + */
> > +#define	EXCEPTION_EXIT				\
> > +	ldp	x21, x22, [sp, S_PC]		;\
> > +						;\
> > +	/* Could be running at EL1 or EL2 */	;\
> > +	mrs	x0, CurrentEL			;\
> > +	cmp	x0, 0x4				;\
> > +	b.eq	1f				;\
> > +	cmp	x0, 0x8				;\
> > +	b.eq	2f				;\
> > +	b	3f				;\
> > +1:	msr	elr_el1, x21			;\
> > +	msr	spsr_el1, x22			;\
> > +	b	3f				;\
> > +2:	msr	elr_el2, x21			;\
> > +	msr	spsr_el2, x22			;\
> > +3:						;\
> > +	pop	x0, x1				;\
> > +	pop	x2, x3				;\
> > +	pop	x4, x5				;\
> > +	pop	x6, x7				;\
> > +	pop	x8, x9				;\
> > +	pop	x10, x11			;\
> > +	pop	x12, x13			;\
> > +	pop	x14, x15			;\
> > +	pop	x16, x17			;\
> > +	pop	x18, x19			;\
> > +	pop	x20, x21			;\
> > +	pop	x22, x23			;\
> > +	pop	x24, x25			;\
> > +	pop	x26, x27			;\
> > +	pop	x28, x29			;\
> > +	ldr	lr, [sp], S_FRAME_SIZE - S_LR	;\
> > +	eret
> > +
> > +/*
> > + * Exception vectors.
> > + */
> > +	.align	11
> > +	.globl	vectors
> > +vectors:
> > +	.align	7
> > +	b	_do_bad_sync	/* Current EL Synchronous Thread */
> > +
> > +	.align	7
> > +	b	_do_bad_irq	/* Current EL IRQ Thread */
> > +
> > +	.align	7
> > +	b	_do_bad_fiq	/* Current EL FIQ Thread */
> > +
> > +	.align	7
> > +	b	_do_bad_error	/* Current EL Error Thread */
> > +
> > +	.align	7
> > +	b	_do_sync	/* Current EL Synchronous Handler */
> > +
> > +	.align	7
> > +	b	_do_irq		/* Current EL IRQ Handler */
> > +
> > +	.align	7
> > +	b	_do_fiq		/* Current EL FIQ Handler */
> > +
> > +	.align	7
> > +	b	_do_error	/* Current EL Error Handler */
> > +
> > +
> > +_do_bad_sync:
> > +	EXCEPTION_ENTRY
> > +	bl	do_bad_sync
> > +
> > +_do_bad_irq:
> > +	EXCEPTION_ENTRY
> > +	bl	do_bad_irq
> > +
> > +_do_bad_fiq:
> > +	EXCEPTION_ENTRY
> > +	bl	do_bad_fiq
> > +
> > +_do_bad_error:
> > +	EXCEPTION_ENTRY
> > +	bl	do_bad_error
> > +
> > +_do_sync:
> > +	EXCEPTION_ENTRY
> > +	bl	do_sync
> > +
> > +_do_irq:
> > +	EXCEPTION_ENTRY
> > +	bl	do_irq
> > +
> > +_do_fiq:
> > +	EXCEPTION_ENTRY
> > +	bl	do_fiq
> > +
> > +_do_error:
> > +	EXCEPTION_ENTRY
> > +	bl	do_error
> > diff --git a/arch/arm/cpu/armv8/interrupts.c b/arch/arm/cpu/armv8/interrupts.c
> > new file mode 100644
> > index 0000000..7a4e9d9
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/interrupts.c
> > @@ -0,0 +1,158 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <common.h>
> > +
> > +
> > +#ifdef CONFIG_USE_IRQ
> 
> No ARM board actually supports this option and you just define the same
> functions, so the ifdef is pointless.

You are right, I'll removed it. 

> 
> > +int interrupt_init(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +/* enable IRQ interrupts */
> > +void enable_interrupts(void)
> > +{
> > +}
> > +
> > +/*
> > + * disable IRQ/FIQ interrupts
> > + * returns true if interrupts had been enabled before we disabled them
> > + */
> > +int disable_interrupts(void)
> > +{
> > +	return 0;
> > +}
> > +#else
> > +int interrupt_init(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +void enable_interrupts(void)
> > +{
> > +	return;
> > +}
> > +int disable_interrupts(void)
> > +{
> > +	return 0;
> > +}
> > +#endif /* CONFIG_USE_IRQ */
> > +
> > +void show_regs(struct pt_regs *regs)
> > +{
> > +	int i;
> > +
> > +	printf("PC:     %lx\n", regs->pc);
> > +	printf("LR:     %lx\n", regs->regs[30]);
> > +	printf("PSTATE: %08lx\n", regs->pstate);
> > +	printf("SP :    %lx\n", regs->sp);
> > +	for (i = 0; i < 30; i += 2)
> > +		printf("x%-2d: %016lx x%-2d: %016lx\n",
> > +		       i, regs->regs[i], i+1, regs->regs[i+1]);
> > +	printf("\n");
> > +}
> > +
> > +/*
> > + * do_bad_sync handles the impossible case in the Synchronous Abort vector.
> > + */
> > +void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr)
> > +{
> > +	printf("Bad mode in \"Synchronous Abort\" handler, esr 0x%08x\n", esr);
> > +	show_regs(pt_regs);
> > +	panic("Resetting CPU ...\n");
> > +}
> > +
> > +/*
> > + * do_bad_irq handles the impossible case in the Irq vector.
> > + */
> > +void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr)
> > +{
> > +	printf("Bad mode in \"Irq\" handler, esr 0x%08x\n", esr);
> > +	show_regs(pt_regs);
> > +	panic("Resetting CPU ...\n");
> > +}
> > +
> > +/*
> > + * do_bad_fiq handles the impossible case in the Fiq vector.
> > + */
> > +void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr)
> > +{
> > +	printf("Bad mode in \"Fiq\" handler, esr 0x%08x\n", esr);
> > +	show_regs(pt_regs);
> > +	panic("Resetting CPU ...\n");
> > +}
> > +
> > +/*
> > + * do_bad_error handles the impossible case in the Error vector.
> > + */
> > +void do_bad_error(struct pt_regs *pt_regs, unsigned int esr)
> > +{
> > +	printf("Bad mode in \"Error\" handler, esr 0x%08x\n", esr);
> > +	show_regs(pt_regs);
> > +	panic("Resetting CPU ...\n");
> > +}
> > +
> > +/*
> > + * do_sync handles the Synchronous Abort exception.
> > + */
> > +void do_sync(struct pt_regs *pt_regs, unsigned int esr)
> > +{
> > +	printf("\"Synchronous Abort\" handler, esr 0x%08x\n", esr);
> > +	show_regs(pt_regs);
> > +	panic("Resetting CPU ...\n");
> > +}
> > +
> > +/*
> > + * do_irq handles the Irq exception.
> > + */
> > +void do_irq(struct pt_regs *pt_regs, unsigned int esr)
> > +{
> > +	printf("\"Irq\" handler, esr 0x%08x\n", esr);
> > +	show_regs(pt_regs);
> > +	panic("Resetting CPU ...\n");
> > +}
> > +
> > +/*
> > + * do_fiq handles the Fiq exception.
> > + */
> > +void do_fiq(struct pt_regs *pt_regs, unsigned int esr)
> > +{
> > +	printf("\"Fiq\" handler, esr 0x%08x\n", esr);
> > +	show_regs(pt_regs);
> > +	panic("Resetting CPU ...\n");
> > +}
> > +
> > +/*
> > + * do_error handles the Error exception.
> > + * Errors are more likely to be processor specific,
> > + * it is defined with weak attribute and can be redefined
> > + * in processor specific code.
> > + */
> > +void __do_error(struct pt_regs *pt_regs, unsigned int esr)
> > +{
> > +	printf("\"Error\" handler, esr 0x%08x\n", esr);
> > +	show_regs(pt_regs);
> > +	panic("Resetting CPU ...\n");
> > +}
> > +void do_error(struct pt_regs *pt_regs, unsigned int esr)
> > +	__attribute__((weak, alias("__do_error")));
> 
> Just declare the function __weak.
> 
> > diff --git a/arch/arm/cpu/armv8/relocate.S b/arch/arm/cpu/armv8/relocate.S
> > new file mode 100644
> > index 0000000..6553d6d
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/relocate.S
> > @@ -0,0 +1,73 @@
> > +/*
> > + * relocate - common relocation function for AArch64 U-Boot
> > + *
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * Copyright (c) 2013  Albert ARIBAUD <albert.u.boot@aribaud.net>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <asm-offsets.h>
> > +#include <config.h>
> > +#include <linux/linkage.h>
> > +
> > +/*
> > + * void relocate_code (addr_moni)
> > + *
> > + * This function relocates the monitor code.
> > + *
> > + * NOTE:
> > + * GOT is used and configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
> > + */
> > +ENTRY(relocate_code)
> > +	/*
> > +	 * Copy u-boot from flash to RAM
> > +	 */
> > +	ldr	x1, =__image_copy_start	/* x1 <- copy source */
> > +	cmp	x1, x0
> > +	b.eq	relocate_done		/* skip relocation */
> > +	mov	x2, x0			/* x2 <- copy destination */
> > +	ldr	x3, =__image_copy_end	/* x3 <- source end address */
> > +
> > +copy_loop:
> > +	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
> > +	stp	x10, x11, [x2], #16	/* copy to   target address [x2] */
> > +	cmp	x1, x3			/* until source end address [x3] */
> > +	b.lo	copy_loop
> > +
> > +	/*
> > +	 * Fix .reloc relocations
> > +	 */
> > +	ldr	x9, [x18, #GD_RELOC_OFF]/* x9 <- relocation offset */
> > +	ldr	x1, =__rel_got_start	/* x1 <- rel got start ofs */
> > +	add	x1, x1, x9		/* x1 <- rel got start in RAM */
> > +	ldr	x2, =__rel_got_end	/* x2 <- rel got end ofs */
> > +	add	x2, x2, x9		/* x2 <- rel got end in RAM */
> > +fixloop:
> > +	ldr	x10, [x1]
> > +	add	x10, x10, x9		/* x10 <- address to be fixed up */
> > +	str	x10, [x1]
> > +	add	x1, x1, #8		/* each gotn entry is 8 bytes */
> > +	cmp	x1, x2
> > +	b.lo	fixloop
> > +
> > +relocate_done:
> > +	ret
> > +ENDPROC(relocate_code)
> > diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
> > new file mode 100644
> > index 0000000..a59b711
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/start.S
> > @@ -0,0 +1,253 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <asm-offsets.h>
> > +#include <config.h>
> > +#include <version.h>
> > +#include <linux/linkage.h>
> > +#include <asm/macro.h>
> > +#include <asm/arch/mmu.h>
> > +
> > +/*************************************************************************
> > + *
> > + * Startup Code (reset vector)
> > + *
> > + *************************************************************************/
> > +
> > +.globl _start
> > +_start:
> > +	b	reset
> > +
> > +	.align 3
> > +
> > +.globl _TEXT_BASE
> > +_TEXT_BASE:
> > +	.quad	CONFIG_SYS_TEXT_BASE
> > +
> > +/*
> > + * These are defined in the linker script.
> > + */
> > +.globl	_end_ofs
> > +_end_ofs:
> > +	.quad	_end - _start
> > +
> > +.globl	_bss_start_ofs
> > +_bss_start_ofs:
> > +	.quad	__bss_start - _start
> > +
> > +.globl	_bss_end_ofs
> > +_bss_end_ofs:
> > +	.quad	__bss_end - _start
> > +
> > +reset:
> > +	/*
> > +	 * EL3 initialisation
> > +	 */
> > +	mrs	x0, CurrentEL
> > +	cmp	x0, #0xc			/* EL3? */
> > +	b.ne	reset_nonsecure			/* skip EL3 initialisation */
> > +
> > +	mov	x0, #0x30			/* RES1 */
> > +	orr	x0, x0, #(1 << 0)		/* Non-secure EL1 */
> > +	orr	x0, x0, #(1 << 8)		/* HVC enable */
> > +	orr	x0, x0, #(1 << 10)		/* 64-bit EL2 */
> > +	msr	scr_el3, x0
> > +
> > +	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3 */
> > +
> > +	/* Counter frequency initialisation */
> > +	ldr	x0, =CONFIG_SYS_CNTFRQ
> > +	msr	cntfrq_el0, x0
> > +
> > +	/* GIC initialisation */
> > +	mrs	x0, mpidr_el1
> > +	tst	x0, #15
> > +	b.ne	1f				/* secondary CPU */
> > +
> > +	ldr	x1, =GIC_DIST_BASE		/* GICD_CTLR */
> > +	mov	w0, #3				/* EnableGrp0 | EnableGrp1 */
> > +	str	w0, [x1]
> > +
> > +1:	ldr	x1, =GIC_DIST_BASE + 0x80	/* GICD_IGROUPR */
> > +	mov	w0, #~0				/* Grp1 interrupts */
> > +	str	w0, [x1], #4
> > +	b.ne	2f		/* Only local interrupts for secondary CPUs */
> > +	str	w0, [x1], #4
> > +	str	w0, [x1], #4
> > +
> > +2:	ldr	x1, =GIC_CPU_BASE		/* GICC_CTLR */
> > +	ldr	w0, [x1]
> > +	mov	w0, #3				/* EnableGrp0 | EnableGrp1 */
> > +	str	w0, [x1]
> > +
> > +	mov	w0, #1 << 7		/* allow NS access to GICC_PMR */
> > +	str	w0, [x1, #4]			/* GICC_PMR */
> > +
> > +	/* SCTLR_EL2 initialisation */
> > +	msr	sctlr_el2, xzr
> > +
> > +#ifdef CONFIG_BOOTING_EL1
> > +	/*
> > +	 * EL2 initialization
> > +	 */
> > +	/* Set EL1 to be 64bit */
> > +	mov	x0, #(1 << 31)
> > +	msr	hcr_el2, x0
> > +
> > +	/* Initialize Generic Timers */
> > +	mrs	x0, cnthctl_el2
> > +	orr	x0, x0, #3		/* Enable EL1 access to timers */
> > +	msr	cnthctl_el2, x0
> > +	msr	cntvoff_el2, x0			/* Clear virtual offset */
> > +	mrs	x0, cntkctl_el1
> > +	orr	x0, x0, #3			/* EL0 access to counters */
> > +	msr	cntkctl_el1, x0
> > +
> > +	/* Initilize ID registers */
> > +	mrs	x0, midr_el1
> > +	mrs	x1, mpidr_el1
> > +	msr	vpidr_el2, x0
> > +	msr	vmpidr_el2, x1
> > +
> > +	/* Coprocessor traps */
> > +	mov	x0, #0x33ff
> > +	msr	cptr_el2, x0		/* Disable coprocessor traps to EL2 */
> > +	msr	hstr_el2, xzr			/* Disable CP15 traps to EL2 */
> > +
> > +	/* SCTLR_EL1 initialization */
> > +	mov	x0, #0x0800
> > +	movk	x0, #0x30d0, lsl #16
> > +	msr	sctlr_el1, x0
> > +#endif
> > +
> > +	/* Return to the EL2_SP1 mode from EL3 */
> > +	adr	x0, reset_nonsecure
> > +#ifdef CONFIG_BOOTING_EL1
> > +	mov	x1, #0x3c5			/* EL1_SP1 | D | A | I | F */
> > +#else
> > +	mov	x1, #0x3c9			/* EL2_SP2 | D | A | I | F */
> > +#endif
> > +	msr	elr_el3, x0
> > +	msr	spsr_el3, x1
> > +	eret
> > +
> > +	/*
> > +	 * MMU Disabled, iCache Disabled, dCache Disabled
> > +	 */
> > +reset_nonsecure:
> > +
> > +	/* Initialize vBAR/CPACR_EL1/MDSCR_EL1 */
> > +	adr	x0, vectors
> > +	switch_el1_el2 x1, 1f, 2f, 3f
> > +1:	msr	vbar_el1, x0
> > +	mov	x0, #3 << 20
> > +	msr	cpacr_el1, x0			/* Enable FP/SIMD */
> > +	msr	mdscr_el1, xzr
> > +	b	3f
> > +2:	msr	vbar_el2, x0
> > +3:
> > +
> > +	/* Cache/BPB/TLB Invalidate */
> > +	bl	__asm_flush_dcache_all		/* dCache invalidate */
> > +	bl	__asm_invalidate_icache_all	/* iCache invalidate */
> > +	bl	__asm_invalidate_tlb_all	/* invalidate I + D TLBs */
> > +
> > +	/* Processor specific initialisation */
> > +#ifndef CONFIG_SKIP_LOWLEVEL_INIT
> > +	bl	lowlevel_init
> > +#endif
> > +
> > +	mrs	x0, mpidr_el1
> > +	tst	x0, #15
> > +	b.eq	master_cpu
> > +
> > +	/*
> > +	 * Secondary CPUs
> > +	 */
> > +slave_cpu:
> > +
> > +	wfe
> > +	ldr	x1, =SECONDARY_CPU_MAILBOX
> > +	ldr	x0, [x1]
> > +	cbz	x0, slave_cpu
> > +	br	x0			/* branch to the given address */
> > +
> > +	/*
> > +	 * Primary CPU
> > +	 */
> > +master_cpu:
> > +
> > +	bl	_main
> > +
> > +/*-------------------------------------------------------------------------*/
> > +
> > +ENTRY(c_runtime_cpu_setup)
> > +	/* If I-cache is enabled invalidate it */
> > +#ifndef CONFIG_SYS_ICACHE_OFF
> > +	ic	iallu			/* I+BTB cache invalidate */
> > +	isb	sy
> > +#endif
> > +
> > +#ifndef CONFIG_SYS_DCACHE_OFF
> > +	/*
> > +	 * Memory region attributes:
> > +	 *
> > +	 *   n = AttrIndx[2:0]
> > +	 *                      n       MAIR
> > +	 *   DEVICE_nGnRnE      000     00000000
> > +	 *   DEVICE_nGnRE       001     00000100
> > +	 *   DEVICE_GRE         010     00001100
> > +	 *   NORMAL_NC          011     01000100
> > +	 *   NORMAL             100     11111111
> > +	 */
> > +	ldr	x0, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
> > +		     MAIR(0x04, MT_DEVICE_nGnRE) | \
> > +		     MAIR(0x0c, MT_DEVICE_GRE) | \
> > +		     MAIR(0x44, MT_NORMAL_NC) | \
> > +		     MAIR(0xff, MT_NORMAL)
> > +
> > +	/*
> > +	 * Set/prepare TCR and TTBR. Using 512GB address range.
> > +	 */
> > +	ldr     x1, =TCR_T0SZ(VA_BITS) | TCR_FLAGS | TCR_TG0_64K
> > +
> > +	switch_el1_el2 x2, 1f, 2f, 3f
> > +1:	orr     x1, x1, TCR_EL1_IPS_40BIT
> > +	msr     mair_el1, x0
> > +	msr     tcr_el1, x1
> > +	b	3f
> > +2:	orr     x1, x1, TCR_EL2_IPS_40BIT
> > +	msr     mair_el2, x0
> > +	msr     tcr_el2, x1
> > +3:
> > +#endif
> > +
> > +	/* Relocate vBAR */
> > +	adr	x0, vectors
> > +	switch_el1_el2 x1, 1f, 2f, 3f
> > +1:	msr	vbar_el1, x0
> > +	b	3f
> > +2:	msr	vbar_el2, x0
> > +3:
> > +
> > +	ret
> > +ENDPROC(c_runtime_cpu_setup)
> > diff --git a/arch/arm/cpu/armv8/timer.c b/arch/arm/cpu/armv8/timer.c
> > new file mode 100644
> > index 0000000..2729e11
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/timer.c
> > @@ -0,0 +1,97 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <common.h>
> > +#include <div64.h>
> > +
> > +/*
> > + * Genertic Timer implementation of __udelay/get_timer/get_ticks/get_tbclk
> > + * functions. If any other timers used, another implementation should be
> > + * placed in platform code.
> > + */
> > +
> > +static inline u64 tick_to_time(u64 tick)
> > +{
> > +	tick *= CONFIG_SYS_HZ;
> > +	do_div(tick, CONFIG_SYS_CNTFRQ);
> 
> You can read the counter frequency at runtime. Then platforms which
> don't start u-boot in EL3 don't need to set this define.
> 
Yes, I'll do it.

> You might want to look at my common timer series. That will greatly
> simplify this code.
> 
> > +	return tick;
> > +}
> > +
> > +static inline u64 time_to_tick(u64 time)
> > +{
> > +	time *= CONFIG_SYS_CNTFRQ;
> > +	do_div(time, CONFIG_SYS_HZ);
> > +	return time;
> > +}
> > +
> > +/*
> > + * Generic timer implementation of get_tbclk()
> > + */
> > +ulong __get_tbclk(void)
> > +{
> > +	return CONFIG_SYS_HZ;
> 
> This should really return the actual counter frequency.
> 
> > +}
> > +ulong get_tbclk(void)
> > +	__attribute__((weak, alias("__get_tbclk")));
> 
> This is not needed.
> 
> 
> > +
> > +/*
> > + * Generic timer implementation of get_timer()
> > + */
> > +ulong __get_timer(ulong base)
> > +{
> > +	u64 cval;
> > +
> > +	isb();
> > +	asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
> > +
> > +	tick_to_time(cval);
> > +
> > +	return tick_to_time(cval) - base;
> > +}
> > +ulong get_timer(ulong base)
> > +	__attribute__((weak, alias("__get_timer")));
> 
> This is not needed.
> 
> 
> > +
> > +/*
> > + * Generic timer implementation of get_ticks()
> > + */
> > +unsigned long long __get_ticks(void)
> > +{
> > +	return get_timer(0);
> > +}
> > +unsigned long long get_ticks(void)
> > +	__attribute__((weak, alias("__get_ticks")));
> 
> This is not needed.
> 
> 
> > +
> > +/*
> > + * Generic timer implementation of __udelay()
> > + */
> > +void ___udelay(ulong usec)
> > +{
> > +	unsigned long ticks, limit;
> > +
> > +	limit = get_ticks() + usec/1000;
> 
> This implementation is not ideal as it gives a udelay resolution of 1 msec.
> 
> > +
> > +	do {
> > +		ticks = get_ticks();
> > +	} while (ticks < limit);
> > +}
> > +void __udelay(ulong usec)
> > +	__attribute__((weak, alias("___udelay")));
> 
> This is not needed.
> 

Actually, get_tbclk()/get_timer()/get_ticks() are used by many driver codes.
I try to keep the behavior of these functions identical with other implementation.
Some codes should be shared between this and your patch.
It would be better to use a new patch to implement this. 


> > diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
> > new file mode 100644
> > index 0000000..b6cc376
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/tlb.S
> > @@ -0,0 +1,45 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#include <asm-offsets.h>
> > +#include <config.h>
> > +#include <version.h>
> > +#include <linux/linkage.h>
> > +#include <asm/macro.h>
> > +
> > +/*
> > + * void __asm_invalidate_tlb_all(void)
> > + *
> > + * invalidate all tlb entries.
> > + */
> > +ENTRY(__asm_invalidate_tlb_all)
> > +	switch_el1_el2 x9, 1f, 2f, 3f
> > +1:	tlbi	vmalle1
> > +	dsb	sy
> > +	isb
> > +	b	3f
> > +2:	tlbi	alle2
> > +	dsb	sy
> > +	isb
> > +3:
> > +	ret
> > +ENDPROC(__asm_invalidate_tlb_all)
> > diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
> > new file mode 100644
> > index 0000000..14842e3
> > --- /dev/null
> > +++ b/arch/arm/cpu/armv8/u-boot.lds
> > @@ -0,0 +1,83 @@
> > +/*
> > + * Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
> > +OUTPUT_ARCH(aarch64)
> > +ENTRY(_start)
> > +SECTIONS
> > +{
> > +	. = 0x00000000;
> > +
> > +	. = ALIGN(8);
> > +	.text :
> > +	{
> > +		*(.__image_copy_start)
> > +		CPUDIR/start.o (.text*)
> > +		*(.text*)
> > +	}
> > +
> > +	. = ALIGN(8);
> > +	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
> > +
> > +	. = ALIGN(8);
> > +	.data : {
> > +		*(.data*)
> > +	}
> > +
> > +	. = ALIGN(8);
> > +
> > +	. = .;
> > +
> > +	. = ALIGN(8);
> > +	.u_boot_list : {
> > +		KEEP(*(SORT(.u_boot_list*)));
> > +	}
> > +
> > +	. = ALIGN(8);
> > +	.reloc : {
> > +		__rel_got_start = .;
> > +		*(.got)
> > +		__rel_got_end = .;
> > +	}
> > +
> > +	.image_copy_end :
> > +	{
> > +		*(.__image_copy_end)
> > +	}
> > +
> > +	_end = .;
> > +
> > +	. = ALIGN(8);
> > +	.bss : {
> > +		__bss_start = .;
> > +		*(.bss*)
> > +		 . = ALIGN(8);
> > +		__bss_end = .;
> > +	}
> > +
> > +	/DISCARD/ : { *(.dynsym) }
> > +	/DISCARD/ : { *(.dynstr*) }
> > +	/DISCARD/ : { *(.dynamic*) }
> > +	/DISCARD/ : { *(.plt*) }
> > +	/DISCARD/ : { *(.interp*) }
> > +	/DISCARD/ : { *(.gnu*) }
> > +}
> > diff --git a/arch/arm/include/asm/arch-armv8/gpio.h b/arch/arm/include/asm/arch-armv8/gpio.h
> > new file mode 100644
> > index 0000000..0fbbcaf
> > --- /dev/null
> > +++ b/arch/arm/include/asm/arch-armv8/gpio.h
> > @@ -0,0 +1,26 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#ifndef _ASM_ARMV8_GPIO_H_
> > +#define _ASM_ARMV8_GPIO_H_
> > +
> > +#endif	/* _ASM_ARMV8_GPIO_H_ */
> > diff --git a/arch/arm/include/asm/arch-armv8/mmu.h b/arch/arm/include/asm/arch-armv8/mmu.h
> > new file mode 100644
> > index 0000000..87412fc
> > --- /dev/null
> > +++ b/arch/arm/include/asm/arch-armv8/mmu.h
> > @@ -0,0 +1,117 @@
> > +/*
> > + * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +#ifndef _ASM_ARMV8_MMU_H_
> > +#define _ASM_ARMV8_MMU_H_
> > +
> > +#ifdef __ASSEMBLY__
> > +#define _AC(X, Y)	X
> > +#else
> > +#define _AC(X, Y)	(X##Y)
> > +#endif
> > +
> > +#define UL(x)		_AC(x, UL)
> > +
> > +/***************************************************************/
> > +/*
> > + * The following definitions are related each other, shoud be
> > + * calculated specifically.
> > + */
> > +#define VA_BITS			(39)
> > +
> > +/* PAGE_SHIFT determines the page size */
> > +#undef  PAGE_SIZE
> > +#define PAGE_SHIFT		16
> > +#define PAGE_SIZE		(1 << PAGE_SHIFT)
> > +#define PAGE_MASK		(~(PAGE_SIZE-1))
> > +
> > +/*
> > + * section address mask and size definitions.
> > + */
> > +#define SECTION_SHIFT		29
> > +#define SECTION_SIZE		(UL(1) << SECTION_SHIFT)
> > +#define SECTION_MASK		(~(SECTION_SIZE-1))
> > +/***************************************************************/
> > +
> > +/*
> > + * Memory types available.
> > + */
> > +#define MT_DEVICE_nGnRnE	0
> > +#define MT_DEVICE_nGnRE		1
> > +#define MT_DEVICE_GRE		2
> > +#define MT_NORMAL_NC		3
> > +#define MT_NORMAL		4
> > +
> > +#define MAIR(attr, mt)		((attr) << ((mt) * 8))
> > +
> > +/*
> > + * Hardware page table definitions.
> > + *
> > + * Level 2 descriptor (PMD).
> > + */
> > +#define PMD_TYPE_MASK		(3 << 0)
> > +#define PMD_TYPE_FAULT		(0 << 0)
> > +#define PMD_TYPE_TABLE		(3 << 0)
> > +#define PMD_TYPE_SECT		(1 << 0)
> > +
> > +/*
> > + * Section
> > + */
> > +#define PMD_SECT_S		(3 << 8)
> > +#define PMD_SECT_AF		(1 << 10)
> > +#define PMD_SECT_NG		(1 << 11)
> > +#define PMD_SECT_PXN		(UL(1) << 53)
> > +#define PMD_SECT_UXN		(UL(1) << 54)
> > +
> > +/*
> > + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
> > + */
> > +#define PMD_ATTRINDX(t)		((t) << 2)
> > +#define PMD_ATTRINDX_MASK	(7 << 2)
> > +
> > +/*
> > + * TCR flags.
> > + */
> > +#define TCR_T0SZ(x)		((64 - (x)) << 0)
> > +#define TCR_IRGN_NC		(0 << 8)
> > +#define TCR_IRGN_WBWA		(1 << 8)
> > +#define TCR_IRGN_WT		(2 << 8)
> > +#define TCR_IRGN_WBnWA		(3 << 8)
> > +#define TCR_IRGN_MASK		(3 << 8)
> > +#define TCR_ORGN_NC		(0 << 10)
> > +#define TCR_ORGN_WBWA		(1 << 10)
> > +#define TCR_ORGN_WT		(2 << 10)
> > +#define TCR_ORGN_WBnWA		(3 << 10)
> > +#define TCR_ORGN_MASK		(3 << 10)
> > +#define TCR_SHARED_NON		(0 << 12)
> > +#define TCR_SHARED_OUTER	(1 << 12)
> > +#define TCR_SHARED_INNER	(2 << 12)
> > +#define TCR_TG0_4K		(0 << 14)
> > +#define TCR_TG0_64K		(1 << 14)
> > +#define TCR_TG0_16K		(2 << 14)
> > +#define TCR_EL1_IPS_40BIT	(2 << 32)
> > +#define TCR_EL2_IPS_40BIT	(2 << 16)
> > +
> > +/* PTWs cacheable, inner/outer WBWA not shareable */
> > +#define TCR_FLAGS		(TCR_IRGN_WBWA | TCR_ORGN_WBWA)
> > +
> > +#endif /* _ASM_ARMV8_MMU_H_ */
> > diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h
> > index c3489f1..7d3f9e4 100644
> > --- a/arch/arm/include/asm/byteorder.h
> > +++ b/arch/arm/include/asm/byteorder.h
> > @@ -23,10 +23,22 @@
> >  #  define __SWAB_64_THRU_32__
> >  #endif
> >  
> > +#ifdef	CONFIG_ARMV8
> > +
> > +#ifdef __AARCH64EB__
> > +#include <linux/byteorder/big_endian.h>
> > +#else
> > +#include <linux/byteorder/little_endian.h>
> > +#endif
> > +
> > +#else	/* CONFIG_ARMV8 */
> > +
> >  #ifdef __ARMEB__
> >  #include <linux/byteorder/big_endian.h>
> >  #else
> >  #include <linux/byteorder/little_endian.h>
> >  #endif
> >  
> > +#endif	/* CONFIG_ARMV8 */
> > +
> >  #endif
> > diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> > index 6d60a4a..49a8a88 100644
> > --- a/arch/arm/include/asm/cache.h
> > +++ b/arch/arm/include/asm/cache.h
> > @@ -11,6 +11,8 @@
> >  
> >  #include <asm/system.h>
> >  
> > +#ifndef CONFIG_ARMV8
> > +
> >  /*
> >   * Invalidate L2 Cache using co-proc instruction
> >   */
> > @@ -28,6 +30,9 @@ void l2_cache_disable(void);
> >  void set_section_dcache(int section, enum dcache_option option);
> >  
> >  void dram_bank_mmu_setup(int bank);
> > +
> > +#endif
> > +
> >  /*
> >   * The current upper bound for ARM L1 data cache line sizes is 64 bytes.  We
> >   * use that value for aligning DMA buffers unless the board config has specified
> > diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
> > index 99b703e..30f008e 100644
> > --- a/arch/arm/include/asm/config.h
> > +++ b/arch/arm/include/asm/config.h
> > @@ -9,4 +9,14 @@
> >  
> >  #define CONFIG_LMB
> >  #define CONFIG_SYS_BOOT_RAMDISK_HIGH
> > +
> > +#ifdef CONFIG_ARMV8
> > +/*
> > + * Currently, GOT is used to relocate u-boot and
> > + * configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
> > + */
> > +#define CONFIG_NEEDS_MANUAL_RELOC
> > +#define CONFIG_PHYS_64BIT
> > +#endif
> > +
> >  #endif
> > diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
> > index 79a9597..b30dd5e 100644
> > --- a/arch/arm/include/asm/global_data.h
> > +++ b/arch/arm/include/asm/global_data.h
> > @@ -47,6 +47,10 @@ struct arch_global_data {
> >  
> >  #include <asm-generic/global_data.h>
> >  
> > -#define DECLARE_GLOBAL_DATA_PTR     register volatile gd_t *gd asm ("r8")
> > +#ifdef CONFIG_ARMV8
> > +#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("x18")
> > +#else
> > +#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("r8")
> > +#endif
> >  
> >  #endif /* __ASM_GBL_DATA_H */
> > diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> > index 1fbc531..6a1f05a 100644
> > --- a/arch/arm/include/asm/io.h
> > +++ b/arch/arm/include/asm/io.h
> > @@ -75,42 +75,45 @@ static inline phys_addr_t virt_to_phys(void * vaddr)
> >  #define __arch_putw(v,a)		(*(volatile unsigned short *)(a) = (v))
> >  #define __arch_putl(v,a)		(*(volatile unsigned int *)(a) = (v))
> >  
> > -extern inline void __raw_writesb(unsigned int addr, const void *data, int bytelen)
> > +extern inline void __raw_writesb(unsigned long addr, const void *data,
> > +				 int bytelen)
> >  {
> >  	uint8_t *buf = (uint8_t *)data;
> >  	while(bytelen--)
> >  		__arch_putb(*buf++, addr);
> >  }
> >  
> > -extern inline void __raw_writesw(unsigned int addr, const void *data, int wordlen)
> > +extern inline void __raw_writesw(unsigned long addr, const void *data,
> > +				 int wordlen)
> >  {
> >  	uint16_t *buf = (uint16_t *)data;
> >  	while(wordlen--)
> >  		__arch_putw(*buf++, addr);
> >  }
> >  
> > -extern inline void __raw_writesl(unsigned int addr, const void *data, int longlen)
> > +extern inline void __raw_writesl(unsigned long addr, const void *data,
> > +				 int longlen)
> >  {
> >  	uint32_t *buf = (uint32_t *)data;
> >  	while(longlen--)
> >  		__arch_putl(*buf++, addr);
> >  }
> >  
> > -extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen)
> > +extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
> >  {
> >  	uint8_t *buf = (uint8_t *)data;
> >  	while(bytelen--)
> >  		*buf++ = __arch_getb(addr);
> >  }
> >  
> > -extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen)
> > +extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
> >  {
> >  	uint16_t *buf = (uint16_t *)data;
> >  	while(wordlen--)
> >  		*buf++ = __arch_getw(addr);
> >  }
> >  
> > -extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
> > +extern inline void __raw_readsl(unsigned long addr, void *data, int longlen)
> >  {
> >  	uint32_t *buf = (uint32_t *)data;
> >  	while(longlen--)
> > diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
> > index ff13f36..40fa300 100644
> > --- a/arch/arm/include/asm/macro.h
> > +++ b/arch/arm/include/asm/macro.h
> > @@ -54,5 +54,39 @@
> >  	bcs	1b
> >  .endm
> >  
> > +#ifdef CONFIG_ARMV8
> > +/*
> > + * Register aliases.
> > + */
> > +lr	.req	x30
> > +
> > +/*
> > + * Store register pairs to stack.
> > + */
> > +.macro	push, xreg1, xreg2
> > +	stp	\xreg1, \xreg2, [sp, #-16]!
> > +.endm
> > +
> > +/*
> > + * Pop register pairs from stack.
> > + */
> > +.macro	pop, xreg1, xreg2
> > +	ldp	\xreg1, \xreg2, [sp], #16
> > +.endm
> > +
> > +/*
> > + * Branch according to exception level
> > + */
> > +.macro	switch_el1_el2, xreg, el1_label, el2_label, fail_label
> > +	mrs	\xreg, CurrentEL
> > +	cmp	\xreg, 0x4
> > +	b.eq	\el1_label
> > +	cmp	\xreg, 0x8
> > +	b.eq	\el2_label
> > +	b	\fail_label
> > +.endm
> > +
> > +#endif /* CONFIG_ARMV8 */
> > +
> >  #endif /* __ASSEMBLY__ */
> >  #endif /* __ASM_ARM_MACRO_H__ */
> > diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h
> > index c412486..b2f90e7 100644
> > --- a/arch/arm/include/asm/posix_types.h
> > +++ b/arch/arm/include/asm/posix_types.h
> > @@ -13,6 +13,8 @@
> >  #ifndef __ARCH_ARM_POSIX_TYPES_H
> >  #define __ARCH_ARM_POSIX_TYPES_H
> >  
> > +#include <config.h>
> > +
> >  /*
> >   * This file is generally used by user-level software, so you need to
> >   * be a little careful about namespace pollution etc.  Also, we cannot
> > @@ -28,6 +30,16 @@ typedef int			__kernel_pid_t;
> >  typedef unsigned short		__kernel_ipc_pid_t;
> >  typedef unsigned short		__kernel_uid_t;
> >  typedef unsigned short		__kernel_gid_t;
> > +
> > +#ifdef	CONFIG_ARMV8
> > +typedef unsigned long		__kernel_size_t;
> > +typedef long			__kernel_ssize_t;
> > +typedef long			__kernel_ptrdiff_t;
> > +typedef long			__kernel_time_t;
> > +typedef long			__kernel_suseconds_t;
> > +typedef long			__kernel_clock_t;
> > +typedef long			__kernel_daddr_t;
> > +#else	/* CONFIG_ARMV8 */
> >  typedef unsigned int		__kernel_size_t;
> >  typedef int			__kernel_ssize_t;
> >  typedef int			__kernel_ptrdiff_t;
> > @@ -35,6 +47,8 @@ typedef long			__kernel_time_t;
> >  typedef long			__kernel_suseconds_t;
> >  typedef long			__kernel_clock_t;
> >  typedef int			__kernel_daddr_t;
> > +#endif	/* CONFIG_ARMV8 */
> > +
> >  typedef char *			__kernel_caddr_t;
> >  typedef unsigned short		__kernel_uid16_t;
> >  typedef unsigned short		__kernel_gid16_t;
> > @@ -44,6 +58,9 @@ typedef unsigned int		__kernel_gid32_t;
> >  typedef unsigned short		__kernel_old_uid_t;
> >  typedef unsigned short		__kernel_old_gid_t;
> >  
> > +typedef __kernel_uid_t		__kernel_old_uid_t;
> > +typedef __kernel_gid_t		__kernel_old_gid_t;
> > +
> >  #ifdef __GNUC__
> >  typedef long long		__kernel_loff_t;
> >  #endif
> > diff --git a/arch/arm/include/asm/proc-armv/ptrace.h b/arch/arm/include/asm/proc-armv/ptrace.h
> > index 79cc644..d0cbb06 100644
> > --- a/arch/arm/include/asm/proc-armv/ptrace.h
> > +++ b/arch/arm/include/asm/proc-armv/ptrace.h
> > @@ -12,6 +12,41 @@
> >  
> >  #include <linux/config.h>
> >  
> > +#ifdef CONFIG_ARMV8
> > +
> > +#define PCMASK		0
> > +
> > +#define S_X0		(0)
> > +#define S_X1		(8)
> > +#define S_X2		(16)
> > +#define S_X3		(24)
> > +#define S_X4		(32)
> > +#define S_X5		(40)
> > +#define S_X6		(48)
> > +#define S_X7		(56)
> > +#define S_LR		(240)
> > +#define S_SP		(248)
> > +#define S_PC		(256)
> > +#define S_PSTATE	(264)
> > +#define S_FRAME_SIZE	(272)
> > +
> > +#ifndef __ASSEMBLY__
> > +
> > +/*
> > + * This struct defines the way the registers are stored on the stack during an
> > + * exception.
> > + */
> > +struct pt_regs {
> > +	unsigned long regs[31];
> > +	unsigned long sp;
> > +	unsigned long pc;
> > +	unsigned long pstate;
> > +};
> > +
> > +#endif	/* __ASSEMBLY__ */
> > +
> > +#else	/* CONFIG_ARMV8 */
> > +
> >  #define USR26_MODE	0x00
> >  #define FIQ26_MODE	0x01
> >  #define IRQ26_MODE	0x02
> > @@ -106,4 +141,6 @@ static inline int valid_user_regs(struct pt_regs *regs)
> >  
> >  #endif	/* __ASSEMBLY__ */
> >  
> > +#endif	/* CONFIG_ARMV8 */
> > +
> >  #endif
> > diff --git a/arch/arm/include/asm/proc-armv/system.h b/arch/arm/include/asm/proc-armv/system.h
> > index b4cfa68..17096fc 100644
> > --- a/arch/arm/include/asm/proc-armv/system.h
> > +++ b/arch/arm/include/asm/proc-armv/system.h
> > @@ -15,6 +15,60 @@
> >  /*
> >   * Save the current interrupt enable state & disable IRQs
> >   */
> > +#ifdef CONFIG_ARMV8
> > +
> > +/*
> > + * Save the current interrupt enable state
> > + * and disable IRQs/FIQs
> > + */
> > +#define local_irq_save(flags)					\
> > +	({							\
> > +	asm volatile(						\
> > +	"mrs	%0, daif"					\
> > +	"msr	daifset, #3"					\
> > +	: "=r" (flags)						\
> > +	:							\
> > +	: "memory");						\
> > +	})
> > +
> > +/*
> > + * restore saved IRQ & FIQ state
> > + */
> > +#define local_irq_restore(flags)				\
> > +	({							\
> > +	asm volatile(						\
> > +	"msr	daif, %0"					\
> > +	:							\
> > +	: "r" (flags)						\
> > +	: "memory");						\
> > +	})
> > +
> > +/*
> > + * Enable IRQs/FIQs
> > + */
> > +#define local_irq_enable()					\
> > +	({							\
> > +	asm volatile(						\
> > +	"msr	daifclr, #3"					\
> > +	:							\
> > +	:							\
> > +	: "memory");						\
> > +	})
> > +
> > +/*
> > + * Disable IRQs/FIQs
> > + */
> > +#define local_irq_disable()					\
> > +	({							\
> > +	asm volatile(						\
> > +	"msr	daifset, #3"					\
> > +	:							\
> > +	:							\
> > +	: "memory");						\
> > +	})
> > +
> > +#else	/* CONFIG_ARMV8 */
> > +
> >  #define local_irq_save(x)					\
> >  	({							\
> >  		unsigned long temp;				\
> > @@ -109,7 +163,10 @@
> >  	: "r" (x)						\
> >  	: "memory")
> >  
> > -#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
> > +#endif	/* CONFIG_ARMV8 */
> > +
> > +#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || \
> > +	defined(CONFIG_ARMV8)
> >  /*
> >   * On the StrongARM, "swp" is terminally broken since it bypasses the
> >   * cache totally.  This means that the cache becomes inconsistent, and,
> > diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
> > index 760345f..0bc2e0f 100644
> > --- a/arch/arm/include/asm/system.h
> > +++ b/arch/arm/include/asm/system.h
> > @@ -1,6 +1,80 @@
> >  #ifndef __ASM_ARM_SYSTEM_H
> >  #define __ASM_ARM_SYSTEM_H
> >  
> > +#ifdef CONFIG_ARMV8
> > +
> > +/*
> > + * SCTLR_EL2 bits definitions
> > + */
> > +#define CR_M		(1 << 0)	/* MMU enable			*/
> > +#define CR_A		(1 << 1)	/* Alignment abort enable	*/
> > +#define CR_C		(1 << 2)	/* Dcache enable		*/
> > +#define CR_SA		(1 << 3)	/* Stack Alignment Check Enable	*/
> > +#define CR_I		(1 << 12)	/* Icache enable		*/
> > +#define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
> > +#define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
> > +
> > +#define PGTABLE_SIZE	(0x10000)
> > +
> > +#ifndef __ASSEMBLY__
> > +
> > +#define isb() __asm__ __volatile__ ("isb" : : : "memory")
> > +
> > +#define wfi() __asm__ __volatile__ ("wfi" : : : "memory")
> > +
> > +static inline unsigned int curent_el(void)
> > +{
> > +	unsigned int el;
> > +	asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc");
> > +	return el >> 2;
> > +}
> > +
> > +static inline unsigned int get_sctlr(void)
> > +{
> > +	unsigned int el, val;
> > +
> > +	el = curent_el();
> > +	if (el == 1)
> > +		asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc");
> > +	else if (el == 2)
> > +		asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
> > +	else
> > +		panic("Not Supported Exception Level");
> > +
> > +	return val;
> > +}
> > +
> > +static inline void set_sctlr(unsigned int val)
> > +{
> > +	unsigned int el;
> > +
> > +	el = curent_el();
> > +	if (el == 1)
> > +		asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc");
> > +	else if (el == 2)
> > +		asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
> > +	else
> > +		panic("Not Supported Exception Level");
> > +
> > +	asm volatile("isb");
> > +}
> > +
> > +void __asm_flush_dcache_all(void);
> > +void __asm_flush_dcache_range(u64 start, u64 end);
> > +void __asm_invalidate_tlb_all(void);
> > +void __asm_invalidate_icache_all(void);
> > +
> > +void v8_outer_cache_enable(void);
> > +void v8_outer_cache_disable(void);
> > +void v8_outer_cache_flush_all(void);
> > +void v8_outer_cache_inval_all(void);
> > +void v8_outer_cache_flush_range(u64 start, u64 end);
> > +void v8_outer_cache_inval_range(u64 start, u64 end);
> > +
> > +#endif	/* __ASSEMBLY__ */
> > +
> > +#else /* CONFIG_ARMV8 */
> > +
> >  #ifdef __KERNEL__
> >  
> >  #define CPU_ARCH_UNKNOWN	0
> > @@ -45,6 +119,8 @@
> >  #define CR_AFE	(1 << 29)	/* Access flag enable			*/
> >  #define CR_TE	(1 << 30)	/* Thumb exception enable		*/
> >  
> > +#define PGTABLE_SIZE		(4096 * 4)
> > +
> >  /*
> >   * This is used to ensure the compiler did actually allocate the register we
> >   * asked it for some inline assembly sequences.  Apparently we can't trust
> > @@ -132,4 +208,6 @@ void mmu_page_table_flush(unsigned long start, unsigned long stop);
> >  
> >  #endif /* __KERNEL__ */
> >  
> > +#endif /* CONFIG_ARMV8 */
> > +
> >  #endif
> > diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
> > index 71dc049..b7794f3 100644
> > --- a/arch/arm/include/asm/types.h
> > +++ b/arch/arm/include/asm/types.h
> > @@ -39,7 +39,11 @@ typedef unsigned int u32;
> >  typedef signed long long s64;
> >  typedef unsigned long long u64;
> >  
> > +#ifdef	CONFIG_ARMV8
> > +#define BITS_PER_LONG 64
> > +#else	/* CONFIG_ARMV8 */
> >  #define BITS_PER_LONG 32
> > +#endif	/* CONFIG_ARMV8 */
> >  
> >  /* Dma addresses are 32-bits wide.  */
> >  
> > diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h
> > index 2b5fce8..3ef5538 100644
> > --- a/arch/arm/include/asm/u-boot.h
> > +++ b/arch/arm/include/asm/u-boot.h
> > @@ -44,6 +44,10 @@ typedef struct bd_info {
> >  #endif /* !CONFIG_SYS_GENERIC_BOARD */
> >  
> >  /* For image.h:image_check_target_arch() */
> > +#ifndef CONFIG_ARMV8
> >  #define IH_ARCH_DEFAULT IH_ARCH_ARM
> > +#else
> > +#define IH_ARCH_DEFAULT IH_ARCH_ARM64
> > +#endif
> >  
> >  #endif	/* _U_BOOT_H_ */
> > diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
> > index 44593a8..0a228fb 100644
> > --- a/arch/arm/include/asm/unaligned.h
> > +++ b/arch/arm/include/asm/unaligned.h
> > @@ -8,7 +8,7 @@
> >  /*
> >   * Select endianness
> >   */
> > -#ifndef __ARMEB__
> > +#if __BYTE_ORDER == __LITTLE_ENDIAN
> >  #define get_unaligned	__get_unaligned_le
> >  #define put_unaligned	__put_unaligned_le
> >  #else
> > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> > index 4e78723..86b19e2 100644
> > --- a/arch/arm/lib/Makefile
> > +++ b/arch/arm/lib/Makefile
> > @@ -10,6 +10,7 @@ include $(TOPDIR)/config.mk
> >  LIB	= $(obj)lib$(ARCH).o
> >  LIBGCC	= $(obj)libgcc.o
> >  
> > +ifndef CONFIG_ARMV8
> >  GLSOBJS	+= _ashldi3.o
> >  GLSOBJS	+= _ashrdi3.o
> >  GLSOBJS	+= _divsi3.o
> > @@ -21,9 +22,12 @@ GLSOBJS	+= _umodsi3.o
> >  GLCOBJS	+= div0.o
> >  
> >  SOBJS-y += crt0.o
> > +endif
> >  
> >  ifndef CONFIG_SPL_BUILD
> > +ifndef CONFIG_ARMV8
> >  SOBJS-y += relocate.o
> > +endif
> >  ifndef CONFIG_SYS_GENERIC_BOARD
> >  COBJS-y	+= board.o
> >  endif
> > @@ -38,11 +42,15 @@ else
> >  COBJS-$(CONFIG_SPL_FRAMEWORK) += spl.o
> >  endif
> >  
> > +ifndef CONFIG_ARMV8
> 
> But you have interrupts.c file?

I just try to keep armv8 specific codes within armv8 directory.

> 
> >  COBJS-y	+= interrupts.o
> > +endif
> >  COBJS-y	+= reset.o
> >  
> >  COBJS-y	+= cache.o
> > +ifndef CONFIG_ARMV8
> >  COBJS-y	+= cache-cp15.o
> > +endif
> >  
> >  SRCS	:= $(GLSOBJS:.o=.S) $(GLCOBJS:.o=.c) \
> >  	   $(SOBJS-y:.o=.S) $(COBJS-y:.o=.c)
> > diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
> > index 34f50b0..61a87a8 100644
> > --- a/arch/arm/lib/board.c
> > +++ b/arch/arm/lib/board.c
> > @@ -344,7 +344,7 @@ void board_init_f(ulong bootflag)
> >  
> >  #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
> >  	/* reserve TLB table */
> > -	gd->arch.tlb_size = 4096 * 4;
> > +	gd->arch.tlb_size = PGTABLE_SIZE;
> >  	addr -= gd->arch.tlb_size;
> >  
> >  	/* round down to next 64 kB limit */
> > @@ -419,6 +419,7 @@ void board_init_f(ulong bootflag)
> >  	}
> >  #endif
> >  
> > +#ifndef CONFIG_ARMV8
> >  	/* setup stackpointer for exeptions */
> >  	gd->irq_sp = addr_sp;
> >  #ifdef CONFIG_USE_IRQ
> > @@ -431,6 +432,10 @@ void board_init_f(ulong bootflag)
> >  
> >  	/* 8-byte alignment for ABI compliance */
> >  	addr_sp &= ~0x07;
> > +#else	/* CONFIG_ARMV8 */
> > +	/* 16-byte alignment for ABI compliance */
> > +	addr_sp &= ~0x0f;
> 
> 16-byte alignment will work for all of ARM. You don't really need an
> ifdef here.

Not only stack alignment, the irq stack is not needed in aarch64 at all.

> 
> > +#endif	/* CONFIG_ARMV8 */
> >  #else
> >  	addr_sp += 128;	/* leave 32 words for abort-stack   */
> >  	gd->irq_sp = addr_sp;
> > @@ -523,6 +528,15 @@ void board_init_r(gd_t *id, ulong dest_addr)
> >  
> >  	debug("monitor flash len: %08lX\n", monitor_flash_len);
> >  	board_init();	/* Setup chipselects */
> > +
> > +#ifdef CONFIG_NEEDS_MANUAL_RELOC
> > +	/*
> > +	 * We have to relocate the command table manually
> > +	 */
> > +	fixup_cmdtable(ll_entry_start(cmd_tbl_t, cmd),
> > +			ll_entry_count(cmd_tbl_t, cmd));
> > +#endif /* CONFIG_NEEDS_MANUAL_RELOC */
> > +
> >  	/*
> >  	 * TODO: printing of the clock inforamtion of the board is now
> >  	 * implemented as part of bdinfo command. Currently only support for
> > diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
> > index eefb456..149cb99 100644
> > --- a/arch/arm/lib/bootm.c
> > +++ b/arch/arm/lib/bootm.c
> > @@ -222,6 +222,21 @@ static void boot_prep_linux(bootm_headers_t *images)
> >  /* Subcommand: GO */
> >  static void boot_jump_linux(bootm_headers_t *images, int flag)
> >  {
> > +#ifdef CONFIG_ARMV8
> > +	void (*kernel_entry)(void *fdt_addr);
> > +	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
> > +
> > +	kernel_entry = (void (*)(void *fdt_addr))images->ep;
> > +
> > +	debug("## Transferring control to Linux (at address %lx)...\n",
> > +	      (ulong) kernel_entry);
> > +	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
> > +
> > +	announce_and_cleanup(fake);
> > +
> > +	if (!fake)
> > +		kernel_entry(images->ft_addr);
> > +#else
> 
> This function could be refactored to avoid duplicating most of it.
> 
> >  	unsigned long machid = gd->bd->bi_arch_number;
> >  	char *s;
> >  	void (*kernel_entry)(int zero, int arch, uint params);
> > @@ -236,8 +251,8 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
> >  		printf("Using machid 0x%lx from environment\n", machid);
> >  	}
> >  
> > -	debug("## Transferring control to Linux (at address %08lx)" \
> > -		"...\n", (ulong) kernel_entry);
> > +	debug("## Transferring control to Linux (at address %08lx)...\n",
> > +	      (ulong) kernel_entry);
> 
> This is an unrelated change.

checkpatch.pl produce warning to this. So, I did it.

> 
> >  	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
> >  	announce_and_cleanup(fake);
> >  
> > @@ -248,6 +263,7 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
> >  
> >  	if (!fake)
> >  		kernel_entry(0, machid, r2);
> > +#endif
> >  }
> >  
> >  /* Main Entry point for arm bootm implementation
> > diff --git a/common/image.c b/common/image.c
> > index 56a5a62..7182549 100644
> > --- a/common/image.c
> > +++ b/common/image.c
> > @@ -81,6 +81,7 @@ static const table_entry_t uimage_arch[] = {
> >  	{	IH_ARCH_NDS32,		"nds32",	"NDS32",	},
> >  	{	IH_ARCH_OPENRISC,	"or1k",		"OpenRISC 1000",},
> >  	{	IH_ARCH_SANDBOX,	"sandbox",	"Sandbox",	},
> > +	{	IH_ARCH_ARM64,		"arm64",	"AArch64",	},
> >  	{	-1,			"",		"",		},
> >  };
> >  
> > diff --git a/doc/README.armv8 b/doc/README.armv8
> > new file mode 100644
> > index 0000000..d348250
> > --- /dev/null
> > +++ b/doc/README.armv8
> > @@ -0,0 +1,10 @@
> > +Notes:
> > +
> > +1. Currenly, u-boot could be running at EL1 or EL2.
> > +
> > +2. GOT is used to relocate u-boot and CONFIG_NEEDS_MANUAL_RELOC is needed.
> > +
> > +3. Fdt should be placed in the first 512 megabytes from the start of the kernel image.
> > +   So, fdt_high should be defined specially. Please reference linux/Documentation/arm64/booting.txt.
> 
> This doesn't sound correct. This should be "512M from the start of RAM."
> The kernel image is at the start of RAM, but you do not necessarily load
> your kernel image in u-boot to that location. A zImage for example is
> loaded somewhere outside the uncompressed kernel location.
> 
> Rob

I take this description from linux/Documentation/arm64/booting.txt.
The current linux kernel of arm64 make the requirement.

> 
> > +
> > +4. Generic board is supported.
> > diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c
> > index 8fb1765..a58147c 100644
> > --- a/examples/standalone/stubs.c
> > +++ b/examples/standalone/stubs.c
> > @@ -39,6 +39,20 @@ gd_t *global_data;
> >  "	bctr\n"				\
> >  	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11");
> >  #elif defined(CONFIG_ARM)
> > +#ifdef CONFIG_ARMV8
> > +/*
> > + * x18 holds the pointer to the global_data, x9 is a call-clobbered
> > + * register
> > + */
> > +#define EXPORT_FUNC(x) \
> > +	asm volatile (			\
> > +"	.globl " #x "\n"		\
> > +#x ":\n"				\
> > +"	ldr	x9, [x18, %0]\n"		\
> > +"	ldr	x9, [x9, %1]\n"		\
> > +"	br	x9\n"		\
> > +	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x9");
> > +#else
> >  /*
> >   * r8 holds the pointer to the global_data, ip is a call-clobbered
> >   * register
> > @@ -50,6 +64,7 @@ gd_t *global_data;
> >  "	ldr	ip, [r8, %0]\n"		\
> >  "	ldr	pc, [ip, %1]\n"		\
> >  	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip");
> > +#endif
> >  #elif defined(CONFIG_MIPS)
> >  /*
> >   * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call-
> > diff --git a/include/image.h b/include/image.h
> > index f93a393..12262d7 100644
> > --- a/include/image.h
> > +++ b/include/image.h
> > @@ -156,6 +156,7 @@ struct lmb;
> >  #define IH_ARCH_SANDBOX		19	/* Sandbox architecture (test only) */
> >  #define IH_ARCH_NDS32	        20	/* ANDES Technology - NDS32  */
> >  #define IH_ARCH_OPENRISC        21	/* OpenRISC 1000  */
> > +#define IH_ARCH_ARM64		22	/* ARM64	*/
> >  
> >  /*
> >   * Image Types
> > 
> 

Best Regards
 David
Scott Wood - Sept. 12, 2013, 1:23 a.m.
On Tue, 2013-09-10 at 16:12 +0800, fenghua@phytium.com.cn wrote:
> +/*
> + * Enter Exception.
> + * This will save the processor state that is X0~X29/LR/SP/ELR/PSTATE
> + * to the stack frame.
> + */
> +#define	EXCEPTION_ENTRY				\

asm macros are nicer.

> +	sub	sp, sp, S_FRAME_SIZE - S_LR	;\
> +	push	x28, x29			;\
> +	push	x26, x27			;\
> +	push	x24, x25			;\
> +	push	x22, x23			;\
> +	push	x20, x21			;\
> +	push	x18, x19			;\
> +	push	x16, x17			;\
> +	push	x14, x15			;\
> +	push	x12, x13			;\
> +	push	x10, x11			;\
> +	push	x8, x9				;\
> +	push	x6, x7				;\
> +	push	x4, x5				;\
> +	push	x2, x3				;\
> +	push	x0, x1				;\
> +	add	x21, sp, S_FRAME_SIZE		;\

You just happened to pick x21 for this on your rewrite?

> +						;\
> +	/* Could be running at EL1 or EL2 */	;\
> +	mrs	x0, CurrentEL			;\
> +	cmp	x0, 0x4				;\
> +	b.eq	1f				;\
> +	cmp	x0, 0x8				;\
> +	b.eq	2f				;\
> +	b	3f				;\
> +1:	mrs	x22, elr_el1			;\
> +	mrs	x23, spsr_el1			;\

Likewise x22 and x23.  Taking Linux code and altering details is not the
same thing as writing from scratch and does not free the code from
GPLv2-only or the need to attribute.

Given how many times we've gone back and forth on this sort of thing,
I'm rather nervous about the legal status of this patchset in general.
What other copying is there that I haven't noticed?

-Scott
fenghua@phytium.com.cn - Sept. 12, 2013, 2:10 a.m.
hi,
    The following codes are originated from linux kernel. I am not sure whether license issues exist.
I list the original codes at the bottom. Please check it and give me some advice.
    If license issues actually exist I would like to remove it. The exception state push action just should be so.

Best regards,
David


U-boot armv8 implementation:
/*                                                                      
 * Enter Exception.                                                     
 * This will save the processor state that is X0~X29/LR/SP/ELR/PSTATE   
 * to the stack frame.                                                  
 */                                                                     
#define	EXCEPTION_ENTRY				\                                         
	       sub	sp, sp, S_FRAME_SIZE - S_LR	;\                                    
	       push	x28, x29			;\                                                
	       push	x26, x27			;\                                                
	       push	x24, x25			;\                                                
	       push	x22, x23			;\                                                
	       push	x20, x21			;\                                                
	       push	x18, x19			;\                                                
	       push	x16, x17			;\                                                
	       push	x14, x15			;\                                                
	       push	x12, x13			;\                                                
	       push	x10, x11			;\                                                
	       push	x8, x9				;\                                                
	       push	x6, x7				;\                                                
	       push	x4, x5				;\                                                
	       push	x2, x3				;\                                                
	       push	x0, x1				;\                                                
	       add	x21, sp, S_FRAME_SIZE		;\                                        
	       					;\                                                          
	       /* Could be running at EL1 or EL2 */	;\                              
	       mrs	x0, CurrentEL			;\                                              
	       cmp	x0, 0x4				;\                                                  
	       b.eq	1f				;\                                                    
	       cmp	x0, 0x8				;\                                                  
	       b.eq	2f				;\                                                    
	       b	3f				;\                                                        
1:       	mrs	x22, elr_el1			;\                                            
	       mrs	x23, spsr_el1			;\                                              
	       mrs	x1, esr_el1			;\                                                
	       b	3f				;\                                                        
2:       	mrs	x22, elr_el2			;\                                            
	       mrs	x23, spsr_el2			;\                                              
	       mrs	x1, esr_el2			;\                                                
3:       						;\                                                        
	       stp	lr, x21, [sp, S_LR]		;\                                          
	       stp	x22, x23, [sp, S_PC]		;\                                        
	       mov	x0, sp                                                            
                                                                        
/*                                                                      
 * Exit Exception.                                                      
 * This will restore the processor state that is X0~X29/LR/SP/ELR/PSTATE
 * from the stack frame and return from exceprion.                      
 */                                                                     
#define	EXCEPTION_EXIT				\                                         
	       ldp	x21, x22, [sp, S_PC]		;\                                        
	       					;\                                                          
	       /* Could be running at EL1 or EL2 */	;\                              
	       mrs	x0, CurrentEL			;\                                              
	       cmp	x0, 0x4				;\                                                  
	       b.eq	1f				;\                                                    
	       cmp	x0, 0x8				;\                                                  
	       b.eq	2f				;\                                                    
	       b	3f				;\                                                        
1:       	msr	elr_el1, x21			;\                                            
	       msr	spsr_el1, x22			;\                                              
	       b	3f				;\                                                        
2:       	msr	elr_el2, x21			;\                                            
	       msr	spsr_el2, x22			;\                                              
3:       						;\                                                        
	       pop	x0, x1				;\                                                  
	       pop	x2, x3				;\                                                  
	       pop	x4, x5				;\                                                  
	       pop	x6, x7				;\                                                  
	       pop	x8, x9				;\                                                  
	       pop	x10, x11			;\                                                  
	       pop	x12, x13			;\                                                  
	       pop	x14, x15			;\                                                  
	       pop	x16, x17			;\                                                  
	       pop	x18, x19			;\                                                  
	       pop	x20, x21			;\                                                  
	       pop	x22, x23			;\                                                  
	       pop	x24, x25			;\                                                  
	       pop	x26, x27			;\                                                  
	       pop	x28, x29			;\                                                  
	       ldr	lr, [sp], S_FRAME_SIZE - S_LR	;\                                  
	       eret


Linux kernel implimentation:

        .macro  kernel_entry, el, regsize = 64
         sub     sp, sp, #S_FRAME_SIZE - S_LR    // room for LR, SP, SPSR, ELR
         .if     \regsize == 32
         mov     w0, w0                          // zero upper 32 bits of x0
         .endif
         push    x28, x29
         push    x26, x27
         push    x24, x25
         push    x22, x23
         push    x20, x21
         push    x18, x19
         push    x16, x17
         push    x14, x15
         push    x12, x13
         push    x10, x11
         push    x8, x9
         push    x6, x7
         push    x4, x5
         push    x2, x3
         push    x0, x1
         .if     \el == 0
         mrs     x21, sp_el0
         .else
         add     x21, sp, #S_FRAME_SIZE
         .endif
         mrs     x22, elr_el1
         mrs     x23, spsr_el1
         stp     lr, x21, [sp, #S_LR]
         stp     x22, x23, [sp, #S_PC]
 
         /*
          * Set syscallno to -1 by default (overridden later if real syscall).
          */
         .if     \el == 0
         mvn     x21, xzr
         str     x21, [sp, #S_SYSCALLNO]
         .endif
 
         /*
          * Registers that may be useful after this macro is invoked:
          *
          * x21 - aborted SP
          * x22 - aborted PC
          * x23 - aborted PSTATE
         */
         .endm
 
         .macro  kernel_exit, el, ret = 0
         ldp     x21, x22, [sp, #S_PC]           // load ELR, SPSR
         .if     \el == 0
         ldr     x23, [sp, #S_SP]                // load return stack pointer
         .endif
         .if     \ret
         ldr     x1, [sp, #S_X1]                 // preserve x0 (syscall return)
         add     sp, sp, S_X2
         .else
         pop     x0, x1
         .endif
         pop     x2, x3                          // load the rest of the registers
         pop     x4, x5
         pop     x6, x7
         pop     x8, x9
         msr     elr_el1, x21                    // set up the return data
         msr     spsr_el1, x22
         .if     \el == 0
         msr     sp_el0, x23
         .endif
         pop     x10, x11
         pop     x12, x13
         pop     x14, x15
         pop     x16, x17
         pop     x18, x19
         pop     x20, x21
         pop     x22, x23
         pop     x24, x25
         pop     x26, x27
         pop     x28, x29
         ldr     lr, [sp], #S_FRAME_SIZE - S_LR  // load LR and restore SP
         eret                                    // return to kernel
         .endm
 
         .macro  get_thread_info, rd
         mov     \rd, sp
         and     \rd, \rd, #~((1 << 13) - 1)     // top of 8K stack
         .endm
fenghua@phytium.com.cn - Sept. 12, 2013, 4:47 a.m.
> -----原始邮件-----
> 发件人: "Scott Wood" <scottwood@freescale.com>
> 发送时间: 2013年9月12日 星期四
> 收件人: fenghua@phytium.com.cn
> 抄送: u-boot@lists.denx.de, trini@ti.com
> 主题: Re: [U-Boot] [PATCH v7 1/5] core support of arm64
> 
> On Tue, 2013-09-10 at 16:12 +0800, fenghua@phytium.com.cn wrote:
> > +/*
> > + * Enter Exception.
> > + * This will save the processor state that is X0~X29/LR/SP/ELR/PSTATE
> > + * to the stack frame.
> > + */
> > +#define	EXCEPTION_ENTRY				\
> 
> asm macros are nicer.
> 
> > +	sub	sp, sp, S_FRAME_SIZE - S_LR	;\
> > +	push	x28, x29			;\
> > +	push	x26, x27			;\
> > +	push	x24, x25			;\
> > +	push	x22, x23			;\
> > +	push	x20, x21			;\
> > +	push	x18, x19			;\
> > +	push	x16, x17			;\
> > +	push	x14, x15			;\
> > +	push	x12, x13			;\
> > +	push	x10, x11			;\
> > +	push	x8, x9				;\
> > +	push	x6, x7				;\
> > +	push	x4, x5				;\
> > +	push	x2, x3				;\
> > +	push	x0, x1				;\
> > +	add	x21, sp, S_FRAME_SIZE		;\
> 
> You just happened to pick x21 for this on your rewrite?
> 
The preservation of exception state just should be that.
If this got problem, I'd like to remove it. Actually, it's unnecessary currently.

Best regards,

> > +						;\
> > +	/* Could be running at EL1 or EL2 */	;\
> > +	mrs	x0, CurrentEL			;\
> > +	cmp	x0, 0x4				;\
> > +	b.eq	1f				;\
> > +	cmp	x0, 0x8				;\
> > +	b.eq	2f				;\
> > +	b	3f				;\
> > +1:	mrs	x22, elr_el1			;\
> > +	mrs	x23, spsr_el1			;\
> 
> Likewise x22 and x23.  Taking Linux code and altering details is not the
> same thing as writing from scratch and does not free the code from
> GPLv2-only or the need to attribute.
> 
> Given how many times we've gone back and forth on this sort of thing,
> I'm rather nervous about the legal status of this patchset in general.
> What other copying is there that I haven't noticed?
> 
> -Scott
> 
> 
>
Tom Rini - Sept. 12, 2013, 2:04 p.m.
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 09/11/2013 09:39 AM, FengHua wrote:
> hi Rob, Thank you for your checking of this patch.
> 
>> -----原始邮件----- 发件人: "Rob Herring" <robherring2@gmail.com> 发送时间:
>> 2013年9月11日 星期三 收件人: fenghua@phytium.com.cn 抄送:
>> u-boot@lists.denx.de, trini@ti.com 主题: Re: [U-Boot] [PATCH v7
>> 1/5] core support of arm64
>> 
>> On 09/10/2013 03:12 AM, fenghua@phytium.com.cn wrote:
[snip]
>>> +++ b/arch/arm/cpu/armv8/cache_v8.c
[snip]
>>> +#include <common.h> +#include <asm/system.h> +#include
>>> <asm/arch/mmu.h>
[snip]
>>> +/* + * Stub implementations for outer cache operations + */ 
>>> +void __v8_outer_cache_enable(void) {} +void
>>> v8_outer_cache_enable(void) +	__attribute__((weak,
>>> alias("__v8_outer_cache_enable")));
>> 
>> These can just be:
>> 
>> void __weak v8_outer_cache_enable(void) {}
>> 
> This format do not works at aarch64-gcc.

This file (and the others doing weak attributes) just needs to add
<linux/compiler.h>

- -- 
Tom
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJSMcnfAAoJENk4IS6UOR1W/b0P/3dwyBIhPbKypNt58cqSz29W
2qCC2IoQZmCyEVRTH/2hLwr1Bw6VZknZovDd2TV959oCUULfScTeOTlgUoqaXDSJ
aVBVfHf9uZWU7aCqVYsyIu1hAHfN82LCVsKIF6uJvHAo4h8rr1x7MeFTi5tmssST
UgWW3xZKeElI8pWwNnSVaAb1yY7X91LbHOCoVP2zbd/k0mxdjHvihIcaxSLv8iTp
vrOJCLBeHj6lpYP9Q68QzDeV0i9XFUyCEIwdF4n/9xTjnvVB6Q+PFAxKfZTCowyp
rnhpMK0wvvKDSg+B2JHdLrUH9RxElfcS1EocRmeku36zH9DEeUzMO2G4W7e+A0YE
E4gL3GsXN3UdLBGCOC1xdV9wAZSM+Mj3bGTExnPhsE0waF70JlDwjyIiGNhtrqfl
+l1Nh8icw6Os4CNDKSS+y8DjNhptlNXoxYxWb/HZP92+jU4mZ95H4Pei8zAfJD50
pvoNhu8jO3gxxG4MzJ6baSpT8tuBerSCISUi22IrJxnztxkiP4EjX7pVFWVhN/Cf
cI5zksWGHoSx+/3vDFYm/p9L5wiA9jAlcGNvE2nAYEtZxS6oEeXY1UrCbpCVMnrv
A7S6YkPUbyRhqjVl0Jwg3HPTJkqp1beOFURFKYsYtL8OM1rRG2rkvcF31uhleSG7
fM0pk3vlasNCp813iR5u
=CWhV
-----END PGP SIGNATURE-----
Tom Rini - Sept. 12, 2013, 2:28 p.m.
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 09/11/2013 10:10 PM, FengHua wrote:
> hi,
>     The following codes are originated from linux kernel. I am not sure whether license issues exist.
> I list the original codes at the bottom. Please check it and give me some advice.
>     If license issues actually exist I would like to remove it. The exception state push action just should be so.

The answer is that you MUST attribute things correctly.  It's OK and
good to bring in code from the kernel.  We just make sure those things
are marked as GPL-2.0 and not GPL-2.0+

- -- 
Tom
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJSMc9yAAoJENk4IS6UOR1WyAwQAJ/l/b/kd20z1XQXlVQSXjtl
NatCefcuIzsHtGBtXncreWNqbyHdyyp2GRQx7oAKwDd//Y62xnoqNNlTtCkItq+C
W6o6ofI1gYqq5j5GbziwHtt4tZMcB4VpW9OxmPGJgKARrr5Mb6Fy12yAPo9/dUNB
qghlz9PXr3LH4JdaQ761EEBQSNdUWSevotuHtUga+ID/3C+kfEUkjYyQ6STlY1c6
zfr99t1JPnQMESOVjYFdrB/zm3i1RC9qIq03BKxnkwBnPmvedrAhwcu3S8u250oh
fiCn0pqXP7qIn/x+W4VfY2I/K3Fusp3Rc+0NWG5kIfSl++OlK2Q8bvilME05eO02
cZ2Y18v/3mWL+l4XxdyCRVzIR0MjUHBY+nH03byLailebwqMXKDie3/LM+wuf2eB
S407T+yf7LKwRq0Opkjl5oFpr+p6uqovCw+VrtYIVDNExHY0YSvAkoebSOIlPZH4
Nh2QUydTk3W4z+0GkjA52IvZKiC38fKCxVRZzgPQYfz24yLFRHWPAAepx3VYvBeY
q/e0oNBS5JpG+1ZBJdOe5nXgRcYIQqQ5/2bSbyME5fgoBIqPK65JFreDdRCd+kRp
aCAJ8Vbct6rijSM+jrcSSgJ94nxu+DG8N03soFcRvT1C06vkjMnA68nu3mX43CnN
U+9pWTJ1Y+QS0t907HfW
=CM86
-----END PGP SIGNATURE-----

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 6e50fc4..d142307 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1095,6 +1095,10 @@  Sergey Yanovich <ynvich@gmail.com>
 
 	lp8x4x		xscale/pxa
 
+David Feng <fenghua@phytium.com.cn>
+
+	vexpress_aemv8a		ARM ARMV8 (Quad Core)
+
 -------------------------------------------------------------------------
 
 Unknown / orphaned boards:
diff --git a/arch/arm/config.mk b/arch/arm/config.mk
index ce3903b..f1c6a7b 100644
--- a/arch/arm/config.mk
+++ b/arch/arm/config.mk
@@ -74,7 +74,9 @@  endif
 endif
 
 # needed for relocation
+ifndef CONFIG_ARMV8
 LDFLAGS_u-boot += -pie
+endif
 
 #
 # FIXME: binutils versions < 2.22 have a bug in the assembler where
@@ -95,6 +97,8 @@  endif
 endif
 
 # check that only R_ARM_RELATIVE relocations are generated
+ifndef CONFIG_ARMV8
 ifneq ($(CONFIG_SPL_BUILD),y)
 ALL-y	+= checkarmreloc
 endif
+endif
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
new file mode 100644
index 0000000..55fd365
--- /dev/null
+++ b/arch/arm/cpu/armv8/Makefile
@@ -0,0 +1,56 @@ 
+#
+# Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundatio; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+
+include $(TOPDIR)/config.mk
+
+LIB	= $(obj)lib$(CPU).o
+
+START	:= start.o
+
+COBJS	+= cpu.o
+COBJS	+= timer.o
+COBJS	+= cache_v8.o
+COBJS	+= interrupts.o
+
+SOBJS	+= crt0.o
+SOBJS	+= relocate.o
+SOBJS	+= exceptions.o
+SOBJS	+= cache.o
+SOBJS	+= tlb.o
+
+SRCS	:= $(START:.o=.S) $(COBJS:.o=.c)
+OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
+START	:= $(addprefix $(obj),$(START))
+
+all:	$(obj).depend $(START) $(LIB)
+
+$(LIB):	$(OBJS)
+	$(call cmd_link_o_target, $(OBJS))
+
+#########################################################################
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
new file mode 100644
index 0000000..050c1c0
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache.S
@@ -0,0 +1,145 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * void __asm_flush_dcache_level(level)
+ *
+ * clean and invalidate one level cache.
+ *
+ * x0: cache level
+ * x1~x9: clobbered
+ */
+ENTRY(__asm_flush_dcache_level)
+	lsl	x1, x0, #1
+	msr	csselr_el1, x1		/* select cache level */
+	isb				/* isb to sych the new cssr & csidr */
+	mrs	x6, ccsidr_el1		/* read the new ccsidr */
+	and	x2, x6, #7		/* x2 <- length of the cache lines */
+	add	x2, x2, #4		/* add 4 (line length offset) */
+	mov	x3, #0x3ff
+	and	x3, x3, x6, lsr #3	/* x3 <- maximum number of way size */
+	clz	w5, w3			/* bit position of way size */
+	mov	x4, #0x7fff
+	and	x4, x4, x1, lsr #13	/* x4 <- max number of the set size */
+	/* x1 <- cache level << 1 */
+	/* x2 <- line length offset */
+	/* x3 <- number of cache ways */
+	/* x4 <- number of cache sets */
+	/* x5 <- bit position of way size */
+
+loop_set:
+	mov	x6, x3			/* create working copy of way size */
+loop_way:
+	lsl	x7, x6, x5
+	orr	x9, x0, x7		/* map way and level to cisw value */
+	lsl	x7, x4, x2
+	orr	x9, x9, x7		/* map set number to cisw value */
+	dc	cisw, x9		/* clean & invalidate by set/way */
+	subs	x6, x6, #1		/* decrement the way */
+	b.ge	loop_way
+	subs	x4, x4, #1		/* decrement the set */
+	b.ge	loop_set
+
+	ret
+ENDPROC(__asm_flush_dcache_level)
+
+/*
+ * void __asm_flush_dcache_all(void)
+ *
+ * clean and invalidate all data cache by SET/WAY.
+ */
+ENTRY(__asm_flush_dcache_all)
+	dsb	sy
+	mov	x15, lr
+	mrs	x10, clidr_el1		/* read clidr */
+	lsr	x11, x10, #24
+	and	x11, x11, #0x7		/* x11 <- loc */
+	cbz	x11, finished		/* if loc is 0, no need to clean */
+	mov	x0, #0			/* start flush at cache level 0 */
+	/* x0  <- cache level */
+	/* x10 <- clidr_el1 */
+	/* x11 <- loc */
+
+loop_level:
+	lsl	x1, x0, #1
+	add	x1, x1, x0		/* x0 <- 3x cache level */
+	lsr	x1, x10, x1
+	and	x1, x1, #7		/* x1 <- cache type */
+	cmp	x1, #2
+	b.lt	skip			/* skip if no cache or icache */
+	bl	__asm_flush_dcache_level
+skip:
+	add	x0, x0, #1		/* increment cache level */
+	cmp	x11, x0
+	b.gt	loop_level
+
+finished:
+	mov	x0, #0
+	msr	csselr_el1, x0		/* swith back to cache level 0 */
+	dsb	sy
+	isb
+	mov	lr, x15
+	ret
+ENDPROC(__asm_flush_dcache_all)
+
+/*
+ * void __asm_flush_dcache_range(start, end)
+ *
+ * clean & invalidate data cache in the range
+ *
+ * x0: start address
+ * x1: end address
+ */
+ENTRY(__asm_flush_dcache_range)
+	mrs	x3, ctr_el0		/* read CTR */
+	lsr	x3, x3, #16
+	and	x3, x3, #0xf		/* cache line size encoding */
+	mov	x2, #4			/* bytes per word */
+	lsl	x2, x2, x3		/* actual cache line size */
+
+	/* x2 <- minimal cache line size in cache system */
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:      dc	civac, x0		/* clean & invalidate D/unified line */
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__asm_flush_dcache_range)
+
+/*
+ * void __asm_invalidate_icache_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_icache_all)
+	ic	ialluis
+	isb	sy
+	ret
+ENDPROC(__asm_invalidate_icache_all)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
new file mode 100644
index 0000000..56a1489
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -0,0 +1,275 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+#include <asm/system.h>
+#include <asm/arch/mmu.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+
+static void set_pgtable_section(u64 section, u64 memory_type)
+{
+	u64 *page_table = (u64 *)gd->arch.tlb_addr;
+	u64 value;
+
+	value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
+	value |= PMD_ATTRINDX(memory_type);
+	page_table[section] = value;
+}
+
+/* to activate the MMU we need to set up virtual memory */
+static inline void mmu_setup(void)
+{
+	int i, j, el;
+	bd_t *bd = gd->bd;
+
+	/* Setup an identity-mapping for all spaces */
+	for (i = 0; i < (PAGE_SIZE >> 3); i++)
+		set_pgtable_section(i, MT_DEVICE_nGnRnE);
+
+	/* Setup an identity-mapping for all RAM space */
+	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+		ulong start = bd->bi_dram[i].start;
+		ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size;
+		for (j = start >> SECTION_SHIFT;
+		     j < end >> SECTION_SHIFT; j++) {
+			set_pgtable_section(j, MT_NORMAL);
+		}
+	}
+
+	/* load TTBR0 */
+	el = curent_el();
+	if (el == 1)
+		asm volatile("msr ttbr0_el1, %0"
+			     : : "r" (gd->arch.tlb_addr) : "memory");
+	else if (el == 2)
+		asm volatile("msr ttbr0_el2, %0"
+			     : : "r" (gd->arch.tlb_addr) : "memory");
+	else
+		panic("Not Supported Exception Level");
+
+	/* enable the mmu */
+	set_sctlr(get_sctlr() | CR_M);
+}
+
+/*
+ * Performs a invalidation of the entire data cache
+ * at all levels
+ */
+void invalidate_dcache_all(void)
+{
+	__asm_flush_dcache_all();
+	v8_outer_cache_inval_all();
+}
+
+/*
+ * Performs a clean & invalidation of the entire data cache
+ * at all levels
+ */
+void flush_dcache_all(void)
+{
+	__asm_flush_dcache_all();
+	v8_outer_cache_flush_all();
+}
+
+/*
+ * Invalidates range in all levels of D-cache/unified cache used:
+ * Affects the range [start, stop - 1]
+ */
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+	__asm_flush_dcache_range(start, stop);
+	v8_outer_cache_inval_range(start, stop);
+}
+
+/*
+ * Flush range(clean & invalidate) from all levels of D-cache/unified
+ * cache used:
+ * Affects the range [start, stop - 1]
+ */
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+	__asm_flush_dcache_range(start, stop);
+	v8_outer_cache_flush_range(start, stop);
+}
+
+void dcache_enable(void)
+{
+	uint32_t sctlr;
+
+	sctlr = get_sctlr();
+
+	/* The data cache is not active unless the mmu is enabled too */
+	if (!(sctlr & CR_M)) {
+		v8_outer_cache_enable();
+		invalidate_dcache_all();
+		__asm_invalidate_tlb_all();
+		mmu_setup();
+	}
+
+	set_sctlr(sctlr | CR_C);
+}
+
+void dcache_disable(void)
+{
+	uint32_t sctlr;
+
+	sctlr = get_sctlr();
+
+	/* if cache isn't enabled no need to disable */
+	if (!(sctlr & CR_C))
+		return;
+
+	set_sctlr(sctlr & ~(CR_C|CR_M));
+
+	flush_dcache_all();
+	__asm_invalidate_tlb_all();
+}
+
+int dcache_status(void)
+{
+	return (get_sctlr() & CR_C) != 0;
+}
+
+#else	/* CONFIG_SYS_DCACHE_OFF */
+
+void invalidate_dcache_all(void)
+{
+}
+
+void flush_dcache_all(void)
+{
+}
+
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void dcache_enable(void)
+{
+}
+
+void dcache_disable(void)
+{
+}
+
+int dcache_status(void)
+{
+	return 0;
+}
+
+#endif	/* CONFIG_SYS_DCACHE_OFF */
+
+#ifndef CONFIG_SYS_ICACHE_OFF
+
+void icache_enable(void)
+{
+	set_sctlr(get_sctlr() | CR_I);
+}
+
+void icache_disable(void)
+{
+	set_sctlr(get_sctlr() & ~CR_I);
+}
+
+int icache_status(void)
+{
+	return (get_sctlr() & CR_I) != 0;
+}
+
+void invalidate_icache_all(void)
+{
+	__asm_invalidate_icache_all();
+}
+
+#else	/* CONFIG_SYS_ICACHE_OFF */
+
+void icache_enable(void)
+{
+}
+
+void icache_disable(void)
+{
+}
+
+int icache_status(void)
+{
+	return 0;
+}
+
+void invalidate_icache_all(void)
+{
+}
+
+#endif	/* CONFIG_SYS_ICACHE_OFF */
+
+/*
+ * Enable dCache & iCache, whether cache is actually enabled
+ * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
+ */
+void enable_caches(void)
+{
+	icache_enable();
+	dcache_enable();
+}
+
+/*
+ * Flush range from all levels of d-cache/unified-cache used:
+ * Affects the range [start, start + size - 1]
+ */
+void flush_cache(unsigned long start, unsigned long size)
+{
+	flush_dcache_range(start, start + size);
+}
+
+/*
+ * Stub implementations for outer cache operations
+ */
+void __v8_outer_cache_enable(void) {}
+void v8_outer_cache_enable(void)
+	__attribute__((weak, alias("__v8_outer_cache_enable")));
+
+void __v8_outer_cache_disable(void) {}
+void v8_outer_cache_disable(void)
+	__attribute__((weak, alias("__v8_outer_cache_disable")));
+
+void __v8_outer_cache_flush_all(void) {}
+void v8_outer_cache_flush_all(void)
+	__attribute__((weak, alias("__v8_outer_cache_flush_all")));
+
+void __v8_outer_cache_inval_all(void) {}
+void v8_outer_cache_inval_all(void)
+	__attribute__((weak, alias("__v8_outer_cache_inval_all")));
+
+void __v8_outer_cache_flush_range(u64 start, u64 end) {}
+void v8_outer_cache_flush_range(u64 start, u64 end)
+	__attribute__((weak, alias("__v8_outer_cache_flush_range")));
+
+void __v8_outer_cache_inval_range(u64 start, u64 end) {}
+void v8_outer_cache_inval_range(u64 start, u64 end)
+	__attribute__((weak, alias("__v8_outer_cache_inval_range")));
diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
new file mode 100644
index 0000000..aae2170
--- /dev/null
+++ b/arch/arm/cpu/armv8/config.mk
@@ -0,0 +1,31 @@ 
+#
+# Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+PLATFORM_RELFLAGS += -fno-common -ffixed-x18
+
+# SEE README.arm-unaligned-accesses
+PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
+PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
+
+PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
+PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
+PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
+PLATFORM_CPPFLAGS += -fpic
diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
new file mode 100644
index 0000000..76e76b6
--- /dev/null
+++ b/arch/arm/cpu/armv8/cpu.c
@@ -0,0 +1,68 @@ 
+/*
+ * (C) Copyright 2008 Texas Insturments
+ *
+ * (C) Copyright 2002
+ * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
+ * Marius Groeger <mgroeger@sysgo.de>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * CPU specific code
+ */
+
+#include <common.h>
+#include <command.h>
+#include <asm/system.h>
+#include <linux/compiler.h>
+
+void __weak cpu_cache_initialization(void){}
+
+int cleanup_before_linux(void)
+{
+	/*
+	 * this function is called just before we call linux
+	 * it prepares the processor for linux
+	 *
+	 * we turn off caches etc ...
+	 */
+#ifndef CONFIG_SPL_BUILD
+	disable_interrupts();
+#endif
+
+	/*
+	 * Turn off I-cache and invalidate it
+	 */
+	icache_disable();
+	invalidate_icache_all();
+
+	/*
+	 * turn off D-cache
+	 * dcache_disable() in turn flushes the d-cache and disables MMU
+	 */
+	dcache_disable();
+	v8_outer_cache_disable();
+
+	/*
+	 * After D-cache is flushed and before it is disabled there may
+	 * be some new valid entries brought into the cache. We are sure
+	 * that these lines are not dirty and will not affect our execution.
+	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
+	 * is all we did during this. We have not pushed anything on to the
+	 * stack. Neither have we affected any static data)
+	 * So just invalidate the entire d-cache again to avoid coherency
+	 * problems for kernel
+	 */
+	invalidate_dcache_all();
+
+	/*
+	 * Some CPU need more cache attention before starting the kernel.
+	 */
+	cpu_cache_initialization();
+
+	return 0;
+}
diff --git a/arch/arm/cpu/armv8/crt0.S b/arch/arm/cpu/armv8/crt0.S
new file mode 100644
index 0000000..97d6806
--- /dev/null
+++ b/arch/arm/cpu/armv8/crt0.S
@@ -0,0 +1,130 @@ 
+/*
+ * crt0 - C-runtime startup Code for AArch64 U-Boot
+ *
+ * Copyright (c) 2013  David Feng <fenghua@phytium.com.cn>
+ *
+ * Copyright (c) 2012  Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <config.h>
+#include <asm-offsets.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * This file handles the target-independent stages of the U-Boot
+ * start-up where a C runtime environment is needed. Its entry point
+ * is _main and is branched into from the target's start.S file.
+ *
+ * _main execution sequence is:
+ *
+ * 1. Set up initial environment for calling board_init_f().
+ *    This environment only provides a stack and a place to store
+ *    the GD ('global data') structure, both located in some readily
+ *    available RAM (SRAM, locked cache...). In this context, VARIABLE
+ *    global data, initialized or not (BSS), are UNAVAILABLE; only
+ *    CONSTANT initialized data are available.
+ *
+ * 2. Call board_init_f(). This function prepares the hardware for
+ *    execution from system RAM (DRAM, DDR...) As system RAM may not
+ *    be available yet, , board_init_f() must use the current GD to
+ *    store any data which must be passed on to later stages. These
+ *    data include the relocation destination, the future stack, and
+ *    the future GD location.
+ *
+ * (the following applies only to non-SPL builds)
+ *
+ * 3. Set up intermediate environment where the stack and GD are the
+ *    ones allocated by board_init_f() in system RAM, but BSS and
+ *    initialized non-const data are still not available.
+ *
+ * 4. Call relocate_code(). This function relocates U-Boot from its
+ *    current location into the relocation destination computed by
+ *    board_init_f().
+ *
+ * 5. Set up final environment for calling board_init_r(). This
+ *    environment has BSS (initialized to 0), initialized non-const
+ *    data (initialized to their intended value), and stack in system
+ *    RAM. GD has retained values set by board_init_f(). Some CPUs
+ *    have some work left to do at this point regarding memory, so
+ *    call c_runtime_cpu_setup.
+ *
+ * 6. Branch to board_init_r().
+ */
+
+ENTRY(_main)
+
+/*
+ * Set up initial C runtime environment and call board_init_f(0).
+ */
+	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
+	sub	x0, x0, #GD_SIZE	/* allocate one GD above SP */
+	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
+	mov	x18, sp			/* GD is above SP */
+	mov	x0, #0
+	bl	board_init_f
+
+/*
+ * Set up intermediate environment (new sp and gd) and call
+ * relocate_code(addr_moni). Trick here is that we'll return
+ * 'here' but relocated.
+ */
+	ldr	x0, [x18, #GD_START_ADDR_SP]	/* x0 <- gd->start_addr_sp */
+	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
+	ldr	x18, [x18, #GD_BD]		/* x18 <- gd->bd */
+	sub	x18, x18, #GD_SIZE		/* new GD is below bd */
+
+	adr	lr, relocation_return
+	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
+	add	lr, lr, x9	/* new return address after relocation */
+	ldr	x0, [x18, #GD_RELOCADDR]	/* x0 <- gd->relocaddr */
+	b	relocate_code
+
+relocation_return:
+
+/*
+ * Set up final (full) environment
+ */
+	bl	c_runtime_cpu_setup		/* still call old routine */
+
+/*
+ * Clear BSS section
+ */
+	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
+	ldr	x0, =__bss_start
+	add	x0, x0, x9			/* x0 <- __bss_start in RAM */
+	ldr	x1, =__bss_end
+	add	x1, x1, x9			/* x1 <- __bss_end in RAM */
+	mov	x2, #0
+clear_loop:
+	str	x2, [x0]
+	add	x0, x0, #8
+	cmp	x0, x1
+	b.lo	clear_loop
+
+	/* call board_init_r(gd_t *id, ulong dest_addr) */
+	mov	x0, x18				/* gd_t */
+	ldr	x1, [x18, #GD_RELOCADDR]	/* dest_addr */
+	b	board_init_r			/* PC relative jump */
+
+	/* NOTREACHED - board_init_r() does not return */
+
+ENDPROC(_main)
diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
new file mode 100644
index 0000000..2a3962b
--- /dev/null
+++ b/arch/arm/cpu/armv8/exceptions.S
@@ -0,0 +1,173 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/ptrace.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * Enter Exception.
+ * This will save the processor state that is X0~X29/LR/SP/ELR/PSTATE
+ * to the stack frame.
+ */
+#define	EXCEPTION_ENTRY				\
+	sub	sp, sp, S_FRAME_SIZE - S_LR	;\
+	push	x28, x29			;\
+	push	x26, x27			;\
+	push	x24, x25			;\
+	push	x22, x23			;\
+	push	x20, x21			;\
+	push	x18, x19			;\
+	push	x16, x17			;\
+	push	x14, x15			;\
+	push	x12, x13			;\
+	push	x10, x11			;\
+	push	x8, x9				;\
+	push	x6, x7				;\
+	push	x4, x5				;\
+	push	x2, x3				;\
+	push	x0, x1				;\
+	add	x21, sp, S_FRAME_SIZE		;\
+						;\
+	/* Could be running at EL1 or EL2 */	;\
+	mrs	x0, CurrentEL			;\
+	cmp	x0, 0x4				;\
+	b.eq	1f				;\
+	cmp	x0, 0x8				;\
+	b.eq	2f				;\
+	b	3f				;\
+1:	mrs	x22, elr_el1			;\
+	mrs	x23, spsr_el1			;\
+	mrs	x1, esr_el1			;\
+	b	3f				;\
+2:	mrs	x22, elr_el2			;\
+	mrs	x23, spsr_el2			;\
+	mrs	x1, esr_el2			;\
+3:						;\
+	stp	lr, x21, [sp, S_LR]		;\
+	stp	x22, x23, [sp, S_PC]		;\
+	mov	x0, sp
+
+/*
+ * Exit Exception.
+ * This will restore the processor state that is X0~X29/LR/SP/ELR/PSTATE
+ * from the stack frame and return from exceprion.
+ */
+#define	EXCEPTION_EXIT				\
+	ldp	x21, x22, [sp, S_PC]		;\
+						;\
+	/* Could be running at EL1 or EL2 */	;\
+	mrs	x0, CurrentEL			;\
+	cmp	x0, 0x4				;\
+	b.eq	1f				;\
+	cmp	x0, 0x8				;\
+	b.eq	2f				;\
+	b	3f				;\
+1:	msr	elr_el1, x21			;\
+	msr	spsr_el1, x22			;\
+	b	3f				;\
+2:	msr	elr_el2, x21			;\
+	msr	spsr_el2, x22			;\
+3:						;\
+	pop	x0, x1				;\
+	pop	x2, x3				;\
+	pop	x4, x5				;\
+	pop	x6, x7				;\
+	pop	x8, x9				;\
+	pop	x10, x11			;\
+	pop	x12, x13			;\
+	pop	x14, x15			;\
+	pop	x16, x17			;\
+	pop	x18, x19			;\
+	pop	x20, x21			;\
+	pop	x22, x23			;\
+	pop	x24, x25			;\
+	pop	x26, x27			;\
+	pop	x28, x29			;\
+	ldr	lr, [sp], S_FRAME_SIZE - S_LR	;\
+	eret
+
+/*
+ * Exception vectors.
+ */
+	.align	11
+	.globl	vectors
+vectors:
+	.align	7
+	b	_do_bad_sync	/* Current EL Synchronous Thread */
+
+	.align	7
+	b	_do_bad_irq	/* Current EL IRQ Thread */
+
+	.align	7
+	b	_do_bad_fiq	/* Current EL FIQ Thread */
+
+	.align	7
+	b	_do_bad_error	/* Current EL Error Thread */
+
+	.align	7
+	b	_do_sync	/* Current EL Synchronous Handler */
+
+	.align	7
+	b	_do_irq		/* Current EL IRQ Handler */
+
+	.align	7
+	b	_do_fiq		/* Current EL FIQ Handler */
+
+	.align	7
+	b	_do_error	/* Current EL Error Handler */
+
+
+_do_bad_sync:
+	EXCEPTION_ENTRY
+	bl	do_bad_sync
+
+_do_bad_irq:
+	EXCEPTION_ENTRY
+	bl	do_bad_irq
+
+_do_bad_fiq:
+	EXCEPTION_ENTRY
+	bl	do_bad_fiq
+
+_do_bad_error:
+	EXCEPTION_ENTRY
+	bl	do_bad_error
+
+_do_sync:
+	EXCEPTION_ENTRY
+	bl	do_sync
+
+_do_irq:
+	EXCEPTION_ENTRY
+	bl	do_irq
+
+_do_fiq:
+	EXCEPTION_ENTRY
+	bl	do_fiq
+
+_do_error:
+	EXCEPTION_ENTRY
+	bl	do_error
diff --git a/arch/arm/cpu/armv8/interrupts.c b/arch/arm/cpu/armv8/interrupts.c
new file mode 100644
index 0000000..7a4e9d9
--- /dev/null
+++ b/arch/arm/cpu/armv8/interrupts.c
@@ -0,0 +1,158 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+
+
+#ifdef CONFIG_USE_IRQ
+int interrupt_init(void)
+{
+	return 0;
+}
+
+/* enable IRQ interrupts */
+void enable_interrupts(void)
+{
+}
+
+/*
+ * disable IRQ/FIQ interrupts
+ * returns true if interrupts had been enabled before we disabled them
+ */
+int disable_interrupts(void)
+{
+	return 0;
+}
+#else
+int interrupt_init(void)
+{
+	return 0;
+}
+
+void enable_interrupts(void)
+{
+	return;
+}
+int disable_interrupts(void)
+{
+	return 0;
+}
+#endif /* CONFIG_USE_IRQ */
+
+void show_regs(struct pt_regs *regs)
+{
+	int i;
+
+	printf("PC:     %lx\n", regs->pc);
+	printf("LR:     %lx\n", regs->regs[30]);
+	printf("PSTATE: %08lx\n", regs->pstate);
+	printf("SP :    %lx\n", regs->sp);
+	for (i = 0; i < 30; i += 2)
+		printf("x%-2d: %016lx x%-2d: %016lx\n",
+		       i, regs->regs[i], i+1, regs->regs[i+1]);
+	printf("\n");
+}
+
+/*
+ * do_bad_sync handles the impossible case in the Synchronous Abort vector.
+ */
+void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Synchronous Abort\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_irq handles the impossible case in the Irq vector.
+ */
+void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Irq\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_fiq handles the impossible case in the Fiq vector.
+ */
+void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Fiq\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_error handles the impossible case in the Error vector.
+ */
+void do_bad_error(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Error\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_sync handles the Synchronous Abort exception.
+ */
+void do_sync(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("\"Synchronous Abort\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_irq handles the Irq exception.
+ */
+void do_irq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("\"Irq\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_fiq handles the Fiq exception.
+ */
+void do_fiq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("\"Fiq\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_error handles the Error exception.
+ * Errors are more likely to be processor specific,
+ * it is defined with weak attribute and can be redefined
+ * in processor specific code.
+ */
+void __do_error(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("\"Error\" handler, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+void do_error(struct pt_regs *pt_regs, unsigned int esr)
+	__attribute__((weak, alias("__do_error")));
diff --git a/arch/arm/cpu/armv8/relocate.S b/arch/arm/cpu/armv8/relocate.S
new file mode 100644
index 0000000..6553d6d
--- /dev/null
+++ b/arch/arm/cpu/armv8/relocate.S
@@ -0,0 +1,73 @@ 
+/*
+ * relocate - common relocation function for AArch64 U-Boot
+ *
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * Copyright (c) 2013  Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <linux/linkage.h>
+
+/*
+ * void relocate_code (addr_moni)
+ *
+ * This function relocates the monitor code.
+ *
+ * NOTE:
+ * GOT is used and configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
+ */
+ENTRY(relocate_code)
+	/*
+	 * Copy u-boot from flash to RAM
+	 */
+	ldr	x1, =__image_copy_start	/* x1 <- copy source */
+	cmp	x1, x0
+	b.eq	relocate_done		/* skip relocation */
+	mov	x2, x0			/* x2 <- copy destination */
+	ldr	x3, =__image_copy_end	/* x3 <- source end address */
+
+copy_loop:
+	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
+	stp	x10, x11, [x2], #16	/* copy to   target address [x2] */
+	cmp	x1, x3			/* until source end address [x3] */
+	b.lo	copy_loop
+
+	/*
+	 * Fix .reloc relocations
+	 */
+	ldr	x9, [x18, #GD_RELOC_OFF]/* x9 <- relocation offset */
+	ldr	x1, =__rel_got_start	/* x1 <- rel got start ofs */
+	add	x1, x1, x9		/* x1 <- rel got start in RAM */
+	ldr	x2, =__rel_got_end	/* x2 <- rel got end ofs */
+	add	x2, x2, x9		/* x2 <- rel got end in RAM */
+fixloop:
+	ldr	x10, [x1]
+	add	x10, x10, x9		/* x10 <- address to be fixed up */
+	str	x10, [x1]
+	add	x1, x1, #8		/* each gotn entry is 8 bytes */
+	cmp	x1, x2
+	b.lo	fixloop
+
+relocate_done:
+	ret
+ENDPROC(relocate_code)
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
new file mode 100644
index 0000000..a59b711
--- /dev/null
+++ b/arch/arm/cpu/armv8/start.S
@@ -0,0 +1,253 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+#include <asm/arch/mmu.h>
+
+/*************************************************************************
+ *
+ * Startup Code (reset vector)
+ *
+ *************************************************************************/
+
+.globl _start
+_start:
+	b	reset
+
+	.align 3
+
+.globl _TEXT_BASE
+_TEXT_BASE:
+	.quad	CONFIG_SYS_TEXT_BASE
+
+/*
+ * These are defined in the linker script.
+ */
+.globl	_end_ofs
+_end_ofs:
+	.quad	_end - _start
+
+.globl	_bss_start_ofs
+_bss_start_ofs:
+	.quad	__bss_start - _start
+
+.globl	_bss_end_ofs
+_bss_end_ofs:
+	.quad	__bss_end - _start
+
+reset:
+	/*
+	 * EL3 initialisation
+	 */
+	mrs	x0, CurrentEL
+	cmp	x0, #0xc			/* EL3? */
+	b.ne	reset_nonsecure			/* skip EL3 initialisation */
+
+	mov	x0, #0x30			/* RES1 */
+	orr	x0, x0, #(1 << 0)		/* Non-secure EL1 */
+	orr	x0, x0, #(1 << 8)		/* HVC enable */
+	orr	x0, x0, #(1 << 10)		/* 64-bit EL2 */
+	msr	scr_el3, x0
+
+	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3 */
+
+	/* Counter frequency initialisation */
+	ldr	x0, =CONFIG_SYS_CNTFRQ
+	msr	cntfrq_el0, x0
+
+	/* GIC initialisation */
+	mrs	x0, mpidr_el1
+	tst	x0, #15
+	b.ne	1f				/* secondary CPU */
+
+	ldr	x1, =GIC_DIST_BASE		/* GICD_CTLR */
+	mov	w0, #3				/* EnableGrp0 | EnableGrp1 */
+	str	w0, [x1]
+
+1:	ldr	x1, =GIC_DIST_BASE + 0x80	/* GICD_IGROUPR */
+	mov	w0, #~0				/* Grp1 interrupts */
+	str	w0, [x1], #4
+	b.ne	2f		/* Only local interrupts for secondary CPUs */
+	str	w0, [x1], #4
+	str	w0, [x1], #4
+
+2:	ldr	x1, =GIC_CPU_BASE		/* GICC_CTLR */
+	ldr	w0, [x1]
+	mov	w0, #3				/* EnableGrp0 | EnableGrp1 */
+	str	w0, [x1]
+
+	mov	w0, #1 << 7		/* allow NS access to GICC_PMR */
+	str	w0, [x1, #4]			/* GICC_PMR */
+
+	/* SCTLR_EL2 initialisation */
+	msr	sctlr_el2, xzr
+
+#ifdef CONFIG_BOOTING_EL1
+	/*
+	 * EL2 initialization
+	 */
+	/* Set EL1 to be 64bit */
+	mov	x0, #(1 << 31)
+	msr	hcr_el2, x0
+
+	/* Initialize Generic Timers */
+	mrs	x0, cnthctl_el2
+	orr	x0, x0, #3		/* Enable EL1 access to timers */
+	msr	cnthctl_el2, x0
+	msr	cntvoff_el2, x0			/* Clear virtual offset */
+	mrs	x0, cntkctl_el1
+	orr	x0, x0, #3			/* EL0 access to counters */
+	msr	cntkctl_el1, x0
+
+	/* Initilize ID registers */
+	mrs	x0, midr_el1
+	mrs	x1, mpidr_el1
+	msr	vpidr_el2, x0
+	msr	vmpidr_el2, x1
+
+	/* Coprocessor traps */
+	mov	x0, #0x33ff
+	msr	cptr_el2, x0		/* Disable coprocessor traps to EL2 */
+	msr	hstr_el2, xzr			/* Disable CP15 traps to EL2 */
+
+	/* SCTLR_EL1 initialization */
+	mov	x0, #0x0800
+	movk	x0, #0x30d0, lsl #16
+	msr	sctlr_el1, x0
+#endif
+
+	/* Return to the EL2_SP1 mode from EL3 */
+	adr	x0, reset_nonsecure
+#ifdef CONFIG_BOOTING_EL1
+	mov	x1, #0x3c5			/* EL1_SP1 | D | A | I | F */
+#else
+	mov	x1, #0x3c9			/* EL2_SP2 | D | A | I | F */
+#endif
+	msr	elr_el3, x0
+	msr	spsr_el3, x1
+	eret
+
+	/*
+	 * MMU Disabled, iCache Disabled, dCache Disabled
+	 */
+reset_nonsecure:
+
+	/* Initialize vBAR/CPACR_EL1/MDSCR_EL1 */
+	adr	x0, vectors
+	switch_el1_el2 x1, 1f, 2f, 3f
+1:	msr	vbar_el1, x0
+	mov	x0, #3 << 20
+	msr	cpacr_el1, x0			/* Enable FP/SIMD */
+	msr	mdscr_el1, xzr
+	b	3f
+2:	msr	vbar_el2, x0
+3:
+
+	/* Cache/BPB/TLB Invalidate */
+	bl	__asm_flush_dcache_all		/* dCache invalidate */
+	bl	__asm_invalidate_icache_all	/* iCache invalidate */
+	bl	__asm_invalidate_tlb_all	/* invalidate I + D TLBs */
+
+	/* Processor specific initialisation */
+#ifndef CONFIG_SKIP_LOWLEVEL_INIT
+	bl	lowlevel_init
+#endif
+
+	mrs	x0, mpidr_el1
+	tst	x0, #15
+	b.eq	master_cpu
+
+	/*
+	 * Secondary CPUs
+	 */
+slave_cpu:
+
+	wfe
+	ldr	x1, =SECONDARY_CPU_MAILBOX
+	ldr	x0, [x1]
+	cbz	x0, slave_cpu
+	br	x0			/* branch to the given address */
+
+	/*
+	 * Primary CPU
+	 */
+master_cpu:
+
+	bl	_main
+
+/*-------------------------------------------------------------------------*/
+
+ENTRY(c_runtime_cpu_setup)
+	/* If I-cache is enabled invalidate it */
+#ifndef CONFIG_SYS_ICACHE_OFF
+	ic	iallu			/* I+BTB cache invalidate */
+	isb	sy
+#endif
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+	/*
+	 * Memory region attributes:
+	 *
+	 *   n = AttrIndx[2:0]
+	 *                      n       MAIR
+	 *   DEVICE_nGnRnE      000     00000000
+	 *   DEVICE_nGnRE       001     00000100
+	 *   DEVICE_GRE         010     00001100
+	 *   NORMAL_NC          011     01000100
+	 *   NORMAL             100     11111111
+	 */
+	ldr	x0, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
+		     MAIR(0x04, MT_DEVICE_nGnRE) | \
+		     MAIR(0x0c, MT_DEVICE_GRE) | \
+		     MAIR(0x44, MT_NORMAL_NC) | \
+		     MAIR(0xff, MT_NORMAL)
+
+	/*
+	 * Set/prepare TCR and TTBR. Using 512GB address range.
+	 */
+	ldr     x1, =TCR_T0SZ(VA_BITS) | TCR_FLAGS | TCR_TG0_64K
+
+	switch_el1_el2 x2, 1f, 2f, 3f
+1:	orr     x1, x1, TCR_EL1_IPS_40BIT
+	msr     mair_el1, x0
+	msr     tcr_el1, x1
+	b	3f
+2:	orr     x1, x1, TCR_EL2_IPS_40BIT
+	msr     mair_el2, x0
+	msr     tcr_el2, x1
+3:
+#endif
+
+	/* Relocate vBAR */
+	adr	x0, vectors
+	switch_el1_el2 x1, 1f, 2f, 3f
+1:	msr	vbar_el1, x0
+	b	3f
+2:	msr	vbar_el2, x0
+3:
+
+	ret
+ENDPROC(c_runtime_cpu_setup)
diff --git a/arch/arm/cpu/armv8/timer.c b/arch/arm/cpu/armv8/timer.c
new file mode 100644
index 0000000..2729e11
--- /dev/null
+++ b/arch/arm/cpu/armv8/timer.c
@@ -0,0 +1,97 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+#include <div64.h>
+
+/*
+ * Genertic Timer implementation of __udelay/get_timer/get_ticks/get_tbclk
+ * functions. If any other timers used, another implementation should be
+ * placed in platform code.
+ */
+
+static inline u64 tick_to_time(u64 tick)
+{
+	tick *= CONFIG_SYS_HZ;
+	do_div(tick, CONFIG_SYS_CNTFRQ);
+	return tick;
+}
+
+static inline u64 time_to_tick(u64 time)
+{
+	time *= CONFIG_SYS_CNTFRQ;
+	do_div(time, CONFIG_SYS_HZ);
+	return time;
+}
+
+/*
+ * Generic timer implementation of get_tbclk()
+ */
+ulong __get_tbclk(void)
+{
+	return CONFIG_SYS_HZ;
+}
+ulong get_tbclk(void)
+	__attribute__((weak, alias("__get_tbclk")));
+
+/*
+ * Generic timer implementation of get_timer()
+ */
+ulong __get_timer(ulong base)
+{
+	u64 cval;
+
+	isb();
+	asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
+
+	tick_to_time(cval);
+
+	return tick_to_time(cval) - base;
+}
+ulong get_timer(ulong base)
+	__attribute__((weak, alias("__get_timer")));
+
+/*
+ * Generic timer implementation of get_ticks()
+ */
+unsigned long long __get_ticks(void)
+{
+	return get_timer(0);
+}
+unsigned long long get_ticks(void)
+	__attribute__((weak, alias("__get_ticks")));
+
+/*
+ * Generic timer implementation of __udelay()
+ */
+void ___udelay(ulong usec)
+{
+	unsigned long ticks, limit;
+
+	limit = get_ticks() + usec/1000;
+
+	do {
+		ticks = get_ticks();
+	} while (ticks < limit);
+}
+void __udelay(ulong usec)
+	__attribute__((weak, alias("___udelay")));
diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
new file mode 100644
index 0000000..b6cc376
--- /dev/null
+++ b/arch/arm/cpu/armv8/tlb.S
@@ -0,0 +1,45 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+
+/*
+ * void __asm_invalidate_tlb_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_tlb_all)
+	switch_el1_el2 x9, 1f, 2f, 3f
+1:	tlbi	vmalle1
+	dsb	sy
+	isb
+	b	3f
+2:	tlbi	alle2
+	dsb	sy
+	isb
+3:
+	ret
+ENDPROC(__asm_invalidate_tlb_all)
diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
new file mode 100644
index 0000000..14842e3
--- /dev/null
+++ b/arch/arm/cpu/armv8/u-boot.lds
@@ -0,0 +1,83 @@ 
+/*
+ * Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+ENTRY(_start)
+SECTIONS
+{
+	. = 0x00000000;
+
+	. = ALIGN(8);
+	.text :
+	{
+		*(.__image_copy_start)
+		CPUDIR/start.o (.text*)
+		*(.text*)
+	}
+
+	. = ALIGN(8);
+	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
+
+	. = ALIGN(8);
+	.data : {
+		*(.data*)
+	}
+
+	. = ALIGN(8);
+
+	. = .;
+
+	. = ALIGN(8);
+	.u_boot_list : {
+		KEEP(*(SORT(.u_boot_list*)));
+	}
+
+	. = ALIGN(8);
+	.reloc : {
+		__rel_got_start = .;
+		*(.got)
+		__rel_got_end = .;
+	}
+
+	.image_copy_end :
+	{
+		*(.__image_copy_end)
+	}
+
+	_end = .;
+
+	. = ALIGN(8);
+	.bss : {
+		__bss_start = .;
+		*(.bss*)
+		 . = ALIGN(8);
+		__bss_end = .;
+	}
+
+	/DISCARD/ : { *(.dynsym) }
+	/DISCARD/ : { *(.dynstr*) }
+	/DISCARD/ : { *(.dynamic*) }
+	/DISCARD/ : { *(.plt*) }
+	/DISCARD/ : { *(.interp*) }
+	/DISCARD/ : { *(.gnu*) }
+}
diff --git a/arch/arm/include/asm/arch-armv8/gpio.h b/arch/arm/include/asm/arch-armv8/gpio.h
new file mode 100644
index 0000000..0fbbcaf
--- /dev/null
+++ b/arch/arm/include/asm/arch-armv8/gpio.h
@@ -0,0 +1,26 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARMV8_GPIO_H_
+#define _ASM_ARMV8_GPIO_H_
+
+#endif	/* _ASM_ARMV8_GPIO_H_ */
diff --git a/arch/arm/include/asm/arch-armv8/mmu.h b/arch/arm/include/asm/arch-armv8/mmu.h
new file mode 100644
index 0000000..87412fc
--- /dev/null
+++ b/arch/arm/include/asm/arch-armv8/mmu.h
@@ -0,0 +1,117 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARMV8_MMU_H_
+#define _ASM_ARMV8_MMU_H_
+
+#ifdef __ASSEMBLY__
+#define _AC(X, Y)	X
+#else
+#define _AC(X, Y)	(X##Y)
+#endif
+
+#define UL(x)		_AC(x, UL)
+
+/***************************************************************/
+/*
+ * The following definitions are related each other, shoud be
+ * calculated specifically.
+ */
+#define VA_BITS			(39)
+
+/* PAGE_SHIFT determines the page size */
+#undef  PAGE_SIZE
+#define PAGE_SHIFT		16
+#define PAGE_SIZE		(1 << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
+
+/*
+ * section address mask and size definitions.
+ */
+#define SECTION_SHIFT		29
+#define SECTION_SIZE		(UL(1) << SECTION_SHIFT)
+#define SECTION_MASK		(~(SECTION_SIZE-1))
+/***************************************************************/
+
+/*
+ * Memory types available.
+ */
+#define MT_DEVICE_nGnRnE	0
+#define MT_DEVICE_nGnRE		1
+#define MT_DEVICE_GRE		2
+#define MT_NORMAL_NC		3
+#define MT_NORMAL		4
+
+#define MAIR(attr, mt)		((attr) << ((mt) * 8))
+
+/*
+ * Hardware page table definitions.
+ *
+ * Level 2 descriptor (PMD).
+ */
+#define PMD_TYPE_MASK		(3 << 0)
+#define PMD_TYPE_FAULT		(0 << 0)
+#define PMD_TYPE_TABLE		(3 << 0)
+#define PMD_TYPE_SECT		(1 << 0)
+
+/*
+ * Section
+ */
+#define PMD_SECT_S		(3 << 8)
+#define PMD_SECT_AF		(1 << 10)
+#define PMD_SECT_NG		(1 << 11)
+#define PMD_SECT_PXN		(UL(1) << 53)
+#define PMD_SECT_UXN		(UL(1) << 54)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PMD_ATTRINDX(t)		((t) << 2)
+#define PMD_ATTRINDX_MASK	(7 << 2)
+
+/*
+ * TCR flags.
+ */
+#define TCR_T0SZ(x)		((64 - (x)) << 0)
+#define TCR_IRGN_NC		(0 << 8)
+#define TCR_IRGN_WBWA		(1 << 8)
+#define TCR_IRGN_WT		(2 << 8)
+#define TCR_IRGN_WBnWA		(3 << 8)
+#define TCR_IRGN_MASK		(3 << 8)
+#define TCR_ORGN_NC		(0 << 10)
+#define TCR_ORGN_WBWA		(1 << 10)
+#define TCR_ORGN_WT		(2 << 10)
+#define TCR_ORGN_WBnWA		(3 << 10)
+#define TCR_ORGN_MASK		(3 << 10)
+#define TCR_SHARED_NON		(0 << 12)
+#define TCR_SHARED_OUTER	(1 << 12)
+#define TCR_SHARED_INNER	(2 << 12)
+#define TCR_TG0_4K		(0 << 14)
+#define TCR_TG0_64K		(1 << 14)
+#define TCR_TG0_16K		(2 << 14)
+#define TCR_EL1_IPS_40BIT	(2 << 32)
+#define TCR_EL2_IPS_40BIT	(2 << 16)
+
+/* PTWs cacheable, inner/outer WBWA not shareable */
+#define TCR_FLAGS		(TCR_IRGN_WBWA | TCR_ORGN_WBWA)
+
+#endif /* _ASM_ARMV8_MMU_H_ */
diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h
index c3489f1..7d3f9e4 100644
--- a/arch/arm/include/asm/byteorder.h
+++ b/arch/arm/include/asm/byteorder.h
@@ -23,10 +23,22 @@ 
 #  define __SWAB_64_THRU_32__
 #endif
 
+#ifdef	CONFIG_ARMV8
+
+#ifdef __AARCH64EB__
+#include <linux/byteorder/big_endian.h>
+#else
+#include <linux/byteorder/little_endian.h>
+#endif
+
+#else	/* CONFIG_ARMV8 */
+
 #ifdef __ARMEB__
 #include <linux/byteorder/big_endian.h>
 #else
 #include <linux/byteorder/little_endian.h>
 #endif
 
+#endif	/* CONFIG_ARMV8 */
+
 #endif
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index 6d60a4a..49a8a88 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -11,6 +11,8 @@ 
 
 #include <asm/system.h>
 
+#ifndef CONFIG_ARMV8
+
 /*
  * Invalidate L2 Cache using co-proc instruction
  */
@@ -28,6 +30,9 @@  void l2_cache_disable(void);
 void set_section_dcache(int section, enum dcache_option option);
 
 void dram_bank_mmu_setup(int bank);
+
+#endif
+
 /*
  * The current upper bound for ARM L1 data cache line sizes is 64 bytes.  We
  * use that value for aligning DMA buffers unless the board config has specified
diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
index 99b703e..30f008e 100644
--- a/arch/arm/include/asm/config.h
+++ b/arch/arm/include/asm/config.h
@@ -9,4 +9,14 @@ 
 
 #define CONFIG_LMB
 #define CONFIG_SYS_BOOT_RAMDISK_HIGH
+
+#ifdef CONFIG_ARMV8
+/*
+ * Currently, GOT is used to relocate u-boot and
+ * configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
+ */
+#define CONFIG_NEEDS_MANUAL_RELOC
+#define CONFIG_PHYS_64BIT
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
index 79a9597..b30dd5e 100644
--- a/arch/arm/include/asm/global_data.h
+++ b/arch/arm/include/asm/global_data.h
@@ -47,6 +47,10 @@  struct arch_global_data {
 
 #include <asm-generic/global_data.h>
 
-#define DECLARE_GLOBAL_DATA_PTR     register volatile gd_t *gd asm ("r8")
+#ifdef CONFIG_ARMV8
+#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("x18")
+#else
+#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("r8")
+#endif
 
 #endif /* __ASM_GBL_DATA_H */
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1fbc531..6a1f05a 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -75,42 +75,45 @@  static inline phys_addr_t virt_to_phys(void * vaddr)
 #define __arch_putw(v,a)		(*(volatile unsigned short *)(a) = (v))
 #define __arch_putl(v,a)		(*(volatile unsigned int *)(a) = (v))
 
-extern inline void __raw_writesb(unsigned int addr, const void *data, int bytelen)
+extern inline void __raw_writesb(unsigned long addr, const void *data,
+				 int bytelen)
 {
 	uint8_t *buf = (uint8_t *)data;
 	while(bytelen--)
 		__arch_putb(*buf++, addr);
 }
 
-extern inline void __raw_writesw(unsigned int addr, const void *data, int wordlen)
+extern inline void __raw_writesw(unsigned long addr, const void *data,
+				 int wordlen)
 {
 	uint16_t *buf = (uint16_t *)data;
 	while(wordlen--)
 		__arch_putw(*buf++, addr);
 }
 
-extern inline void __raw_writesl(unsigned int addr, const void *data, int longlen)
+extern inline void __raw_writesl(unsigned long addr, const void *data,
+				 int longlen)
 {
 	uint32_t *buf = (uint32_t *)data;
 	while(longlen--)
 		__arch_putl(*buf++, addr);
 }
 
-extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen)
+extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
 {
 	uint8_t *buf = (uint8_t *)data;
 	while(bytelen--)
 		*buf++ = __arch_getb(addr);
 }
 
-extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen)
+extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
 {
 	uint16_t *buf = (uint16_t *)data;
 	while(wordlen--)
 		*buf++ = __arch_getw(addr);
 }
 
-extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
+extern inline void __raw_readsl(unsigned long addr, void *data, int longlen)
 {
 	uint32_t *buf = (uint32_t *)data;
 	while(longlen--)
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
index ff13f36..40fa300 100644
--- a/arch/arm/include/asm/macro.h
+++ b/arch/arm/include/asm/macro.h
@@ -54,5 +54,39 @@ 
 	bcs	1b
 .endm
 
+#ifdef CONFIG_ARMV8
+/*
+ * Register aliases.
+ */
+lr	.req	x30
+
+/*
+ * Store register pairs to stack.
+ */
+.macro	push, xreg1, xreg2
+	stp	\xreg1, \xreg2, [sp, #-16]!
+.endm
+
+/*
+ * Pop register pairs from stack.
+ */
+.macro	pop, xreg1, xreg2
+	ldp	\xreg1, \xreg2, [sp], #16
+.endm
+
+/*
+ * Branch according to exception level
+ */
+.macro	switch_el1_el2, xreg, el1_label, el2_label, fail_label
+	mrs	\xreg, CurrentEL
+	cmp	\xreg, 0x4
+	b.eq	\el1_label
+	cmp	\xreg, 0x8
+	b.eq	\el2_label
+	b	\fail_label
+.endm
+
+#endif /* CONFIG_ARMV8 */
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_ARM_MACRO_H__ */
diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h
index c412486..b2f90e7 100644
--- a/arch/arm/include/asm/posix_types.h
+++ b/arch/arm/include/asm/posix_types.h
@@ -13,6 +13,8 @@ 
 #ifndef __ARCH_ARM_POSIX_TYPES_H
 #define __ARCH_ARM_POSIX_TYPES_H
 
+#include <config.h>
+
 /*
  * This file is generally used by user-level software, so you need to
  * be a little careful about namespace pollution etc.  Also, we cannot
@@ -28,6 +30,16 @@  typedef int			__kernel_pid_t;
 typedef unsigned short		__kernel_ipc_pid_t;
 typedef unsigned short		__kernel_uid_t;
 typedef unsigned short		__kernel_gid_t;
+
+#ifdef	CONFIG_ARMV8
+typedef unsigned long		__kernel_size_t;
+typedef long			__kernel_ssize_t;
+typedef long			__kernel_ptrdiff_t;
+typedef long			__kernel_time_t;
+typedef long			__kernel_suseconds_t;
+typedef long			__kernel_clock_t;
+typedef long			__kernel_daddr_t;
+#else	/* CONFIG_ARMV8 */
 typedef unsigned int		__kernel_size_t;
 typedef int			__kernel_ssize_t;
 typedef int			__kernel_ptrdiff_t;
@@ -35,6 +47,8 @@  typedef long			__kernel_time_t;
 typedef long			__kernel_suseconds_t;
 typedef long			__kernel_clock_t;
 typedef int			__kernel_daddr_t;
+#endif	/* CONFIG_ARMV8 */
+
 typedef char *			__kernel_caddr_t;
 typedef unsigned short		__kernel_uid16_t;
 typedef unsigned short		__kernel_gid16_t;
@@ -44,6 +58,9 @@  typedef unsigned int		__kernel_gid32_t;
 typedef unsigned short		__kernel_old_uid_t;
 typedef unsigned short		__kernel_old_gid_t;
 
+typedef __kernel_uid_t		__kernel_old_uid_t;
+typedef __kernel_gid_t		__kernel_old_gid_t;
+
 #ifdef __GNUC__
 typedef long long		__kernel_loff_t;
 #endif
diff --git a/arch/arm/include/asm/proc-armv/ptrace.h b/arch/arm/include/asm/proc-armv/ptrace.h
index 79cc644..d0cbb06 100644
--- a/arch/arm/include/asm/proc-armv/ptrace.h
+++ b/arch/arm/include/asm/proc-armv/ptrace.h
@@ -12,6 +12,41 @@ 
 
 #include <linux/config.h>
 
+#ifdef CONFIG_ARMV8
+
+#define PCMASK		0
+
+#define S_X0		(0)
+#define S_X1		(8)
+#define S_X2		(16)
+#define S_X3		(24)
+#define S_X4		(32)
+#define S_X5		(40)
+#define S_X6		(48)
+#define S_X7		(56)
+#define S_LR		(240)
+#define S_SP		(248)
+#define S_PC		(256)
+#define S_PSTATE	(264)
+#define S_FRAME_SIZE	(272)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This struct defines the way the registers are stored on the stack during an
+ * exception.
+ */
+struct pt_regs {
+	unsigned long regs[31];
+	unsigned long sp;
+	unsigned long pc;
+	unsigned long pstate;
+};
+
+#endif	/* __ASSEMBLY__ */
+
+#else	/* CONFIG_ARMV8 */
+
 #define USR26_MODE	0x00
 #define FIQ26_MODE	0x01
 #define IRQ26_MODE	0x02
@@ -106,4 +141,6 @@  static inline int valid_user_regs(struct pt_regs *regs)
 
 #endif	/* __ASSEMBLY__ */
 
+#endif	/* CONFIG_ARMV8 */
+
 #endif
diff --git a/arch/arm/include/asm/proc-armv/system.h b/arch/arm/include/asm/proc-armv/system.h
index b4cfa68..17096fc 100644
--- a/arch/arm/include/asm/proc-armv/system.h
+++ b/arch/arm/include/asm/proc-armv/system.h
@@ -15,6 +15,60 @@ 
 /*
  * Save the current interrupt enable state & disable IRQs
  */
+#ifdef CONFIG_ARMV8
+
+/*
+ * Save the current interrupt enable state
+ * and disable IRQs/FIQs
+ */
+#define local_irq_save(flags)					\
+	({							\
+	asm volatile(						\
+	"mrs	%0, daif"					\
+	"msr	daifset, #3"					\
+	: "=r" (flags)						\
+	:							\
+	: "memory");						\
+	})
+
+/*
+ * restore saved IRQ & FIQ state
+ */
+#define local_irq_restore(flags)				\
+	({							\
+	asm volatile(						\
+	"msr	daif, %0"					\
+	:							\
+	: "r" (flags)						\
+	: "memory");						\
+	})
+
+/*
+ * Enable IRQs/FIQs
+ */
+#define local_irq_enable()					\
+	({							\
+	asm volatile(						\
+	"msr	daifclr, #3"					\
+	:							\
+	:							\
+	: "memory");						\
+	})
+
+/*
+ * Disable IRQs/FIQs
+ */
+#define local_irq_disable()					\
+	({							\
+	asm volatile(						\
+	"msr	daifset, #3"					\
+	:							\
+	:							\
+	: "memory");						\
+	})
+
+#else	/* CONFIG_ARMV8 */
+
 #define local_irq_save(x)					\
 	({							\
 		unsigned long temp;				\
@@ -109,7 +163,10 @@ 
 	: "r" (x)						\
 	: "memory")
 
-#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
+#endif	/* CONFIG_ARMV8 */
+
+#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || \
+	defined(CONFIG_ARMV8)
 /*
  * On the StrongARM, "swp" is terminally broken since it bypasses the
  * cache totally.  This means that the cache becomes inconsistent, and,
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 760345f..0bc2e0f 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -1,6 +1,80 @@ 
 #ifndef __ASM_ARM_SYSTEM_H
 #define __ASM_ARM_SYSTEM_H
 
+#ifdef CONFIG_ARMV8
+
+/*
+ * SCTLR_EL2 bits definitions
+ */
+#define CR_M		(1 << 0)	/* MMU enable			*/
+#define CR_A		(1 << 1)	/* Alignment abort enable	*/
+#define CR_C		(1 << 2)	/* Dcache enable		*/
+#define CR_SA		(1 << 3)	/* Stack Alignment Check Enable	*/
+#define CR_I		(1 << 12)	/* Icache enable		*/
+#define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
+#define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
+
+#define PGTABLE_SIZE	(0x10000)
+
+#ifndef __ASSEMBLY__
+
+#define isb() __asm__ __volatile__ ("isb" : : : "memory")
+
+#define wfi() __asm__ __volatile__ ("wfi" : : : "memory")
+
+static inline unsigned int curent_el(void)
+{
+	unsigned int el;
+	asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc");
+	return el >> 2;
+}
+
+static inline unsigned int get_sctlr(void)
+{
+	unsigned int el, val;
+
+	el = curent_el();
+	if (el == 1)
+		asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc");
+	else if (el == 2)
+		asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
+	else
+		panic("Not Supported Exception Level");
+
+	return val;
+}
+
+static inline void set_sctlr(unsigned int val)
+{
+	unsigned int el;
+
+	el = curent_el();
+	if (el == 1)
+		asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc");
+	else if (el == 2)
+		asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
+	else
+		panic("Not Supported Exception Level");
+
+	asm volatile("isb");
+}
+
+void __asm_flush_dcache_all(void);
+void __asm_flush_dcache_range(u64 start, u64 end);
+void __asm_invalidate_tlb_all(void);
+void __asm_invalidate_icache_all(void);
+
+void v8_outer_cache_enable(void);
+void v8_outer_cache_disable(void);
+void v8_outer_cache_flush_all(void);
+void v8_outer_cache_inval_all(void);
+void v8_outer_cache_flush_range(u64 start, u64 end);
+void v8_outer_cache_inval_range(u64 start, u64 end);
+
+#endif	/* __ASSEMBLY__ */
+
+#else /* CONFIG_ARMV8 */
+
 #ifdef __KERNEL__
 
 #define CPU_ARCH_UNKNOWN	0
@@ -45,6 +119,8 @@ 
 #define CR_AFE	(1 << 29)	/* Access flag enable			*/
 #define CR_TE	(1 << 30)	/* Thumb exception enable		*/
 
+#define PGTABLE_SIZE		(4096 * 4)
+
 /*
  * This is used to ensure the compiler did actually allocate the register we
  * asked it for some inline assembly sequences.  Apparently we can't trust
@@ -132,4 +208,6 @@  void mmu_page_table_flush(unsigned long start, unsigned long stop);
 
 #endif /* __KERNEL__ */
 
+#endif /* CONFIG_ARMV8 */
+
 #endif
diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
index 71dc049..b7794f3 100644
--- a/arch/arm/include/asm/types.h
+++ b/arch/arm/include/asm/types.h
@@ -39,7 +39,11 @@  typedef unsigned int u32;
 typedef signed long long s64;
 typedef unsigned long long u64;
 
+#ifdef	CONFIG_ARMV8
+#define BITS_PER_LONG 64
+#else	/* CONFIG_ARMV8 */
 #define BITS_PER_LONG 32
+#endif	/* CONFIG_ARMV8 */
 
 /* Dma addresses are 32-bits wide.  */
 
diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h
index 2b5fce8..3ef5538 100644
--- a/arch/arm/include/asm/u-boot.h
+++ b/arch/arm/include/asm/u-boot.h
@@ -44,6 +44,10 @@  typedef struct bd_info {
 #endif /* !CONFIG_SYS_GENERIC_BOARD */
 
 /* For image.h:image_check_target_arch() */
+#ifndef CONFIG_ARMV8
 #define IH_ARCH_DEFAULT IH_ARCH_ARM
+#else
+#define IH_ARCH_DEFAULT IH_ARCH_ARM64
+#endif
 
 #endif	/* _U_BOOT_H_ */
diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
index 44593a8..0a228fb 100644
--- a/arch/arm/include/asm/unaligned.h
+++ b/arch/arm/include/asm/unaligned.h
@@ -8,7 +8,7 @@ 
 /*
  * Select endianness
  */
-#ifndef __ARMEB__
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 #define get_unaligned	__get_unaligned_le
 #define put_unaligned	__put_unaligned_le
 #else
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 4e78723..86b19e2 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -10,6 +10,7 @@  include $(TOPDIR)/config.mk
 LIB	= $(obj)lib$(ARCH).o
 LIBGCC	= $(obj)libgcc.o
 
+ifndef CONFIG_ARMV8
 GLSOBJS	+= _ashldi3.o
 GLSOBJS	+= _ashrdi3.o
 GLSOBJS	+= _divsi3.o
@@ -21,9 +22,12 @@  GLSOBJS	+= _umodsi3.o
 GLCOBJS	+= div0.o
 
 SOBJS-y += crt0.o
+endif
 
 ifndef CONFIG_SPL_BUILD
+ifndef CONFIG_ARMV8
 SOBJS-y += relocate.o
+endif
 ifndef CONFIG_SYS_GENERIC_BOARD
 COBJS-y	+= board.o
 endif
@@ -38,11 +42,15 @@  else
 COBJS-$(CONFIG_SPL_FRAMEWORK) += spl.o
 endif
 
+ifndef CONFIG_ARMV8
 COBJS-y	+= interrupts.o
+endif
 COBJS-y	+= reset.o
 
 COBJS-y	+= cache.o
+ifndef CONFIG_ARMV8
 COBJS-y	+= cache-cp15.o
+endif
 
 SRCS	:= $(GLSOBJS:.o=.S) $(GLCOBJS:.o=.c) \
 	   $(SOBJS-y:.o=.S) $(COBJS-y:.o=.c)
diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
index 34f50b0..61a87a8 100644
--- a/arch/arm/lib/board.c
+++ b/arch/arm/lib/board.c
@@ -344,7 +344,7 @@  void board_init_f(ulong bootflag)
 
 #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
 	/* reserve TLB table */
-	gd->arch.tlb_size = 4096 * 4;
+	gd->arch.tlb_size = PGTABLE_SIZE;
 	addr -= gd->arch.tlb_size;
 
 	/* round down to next 64 kB limit */
@@ -419,6 +419,7 @@  void board_init_f(ulong bootflag)
 	}
 #endif
 
+#ifndef CONFIG_ARMV8
 	/* setup stackpointer for exeptions */
 	gd->irq_sp = addr_sp;
 #ifdef CONFIG_USE_IRQ
@@ -431,6 +432,10 @@  void board_init_f(ulong bootflag)
 
 	/* 8-byte alignment for ABI compliance */
 	addr_sp &= ~0x07;
+#else	/* CONFIG_ARMV8 */
+	/* 16-byte alignment for ABI compliance */
+	addr_sp &= ~0x0f;
+#endif	/* CONFIG_ARMV8 */
 #else
 	addr_sp += 128;	/* leave 32 words for abort-stack   */
 	gd->irq_sp = addr_sp;
@@ -523,6 +528,15 @@  void board_init_r(gd_t *id, ulong dest_addr)
 
 	debug("monitor flash len: %08lX\n", monitor_flash_len);
 	board_init();	/* Setup chipselects */
+
+#ifdef CONFIG_NEEDS_MANUAL_RELOC
+	/*
+	 * We have to relocate the command table manually
+	 */
+	fixup_cmdtable(ll_entry_start(cmd_tbl_t, cmd),
+			ll_entry_count(cmd_tbl_t, cmd));
+#endif /* CONFIG_NEEDS_MANUAL_RELOC */
+
 	/*
 	 * TODO: printing of the clock inforamtion of the board is now
 	 * implemented as part of bdinfo command. Currently only support for
diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
index eefb456..149cb99 100644
--- a/arch/arm/lib/bootm.c
+++ b/arch/arm/lib/bootm.c
@@ -222,6 +222,21 @@  static void boot_prep_linux(bootm_headers_t *images)
 /* Subcommand: GO */
 static void boot_jump_linux(bootm_headers_t *images, int flag)
 {
+#ifdef CONFIG_ARMV8
+	void (*kernel_entry)(void *fdt_addr);
+	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
+
+	kernel_entry = (void (*)(void *fdt_addr))images->ep;
+
+	debug("## Transferring control to Linux (at address %lx)...\n",
+	      (ulong) kernel_entry);
+	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
+
+	announce_and_cleanup(fake);
+
+	if (!fake)
+		kernel_entry(images->ft_addr);
+#else
 	unsigned long machid = gd->bd->bi_arch_number;
 	char *s;
 	void (*kernel_entry)(int zero, int arch, uint params);
@@ -236,8 +251,8 @@  static void boot_jump_linux(bootm_headers_t *images, int flag)
 		printf("Using machid 0x%lx from environment\n", machid);
 	}
 
-	debug("## Transferring control to Linux (at address %08lx)" \
-		"...\n", (ulong) kernel_entry);
+	debug("## Transferring control to Linux (at address %08lx)...\n",
+	      (ulong) kernel_entry);
 	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
 	announce_and_cleanup(fake);
 
@@ -248,6 +263,7 @@  static void boot_jump_linux(bootm_headers_t *images, int flag)
 
 	if (!fake)
 		kernel_entry(0, machid, r2);
+#endif
 }
 
 /* Main Entry point for arm bootm implementation
diff --git a/common/image.c b/common/image.c
index 56a5a62..7182549 100644
--- a/common/image.c
+++ b/common/image.c
@@ -81,6 +81,7 @@  static const table_entry_t uimage_arch[] = {
 	{	IH_ARCH_NDS32,		"nds32",	"NDS32",	},
 	{	IH_ARCH_OPENRISC,	"or1k",		"OpenRISC 1000",},
 	{	IH_ARCH_SANDBOX,	"sandbox",	"Sandbox",	},
+	{	IH_ARCH_ARM64,		"arm64",	"AArch64",	},
 	{	-1,			"",		"",		},
 };
 
diff --git a/doc/README.armv8 b/doc/README.armv8
new file mode 100644
index 0000000..d348250
--- /dev/null
+++ b/doc/README.armv8
@@ -0,0 +1,10 @@ 
+Notes:
+
+1. Currenly, u-boot could be running at EL1 or EL2.
+
+2. GOT is used to relocate u-boot and CONFIG_NEEDS_MANUAL_RELOC is needed.
+
+3. Fdt should be placed in the first 512 megabytes from the start of the kernel image.
+   So, fdt_high should be defined specially. Please reference linux/Documentation/arm64/booting.txt.
+
+4. Generic board is supported.
diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c
index 8fb1765..a58147c 100644
--- a/examples/standalone/stubs.c
+++ b/examples/standalone/stubs.c
@@ -39,6 +39,20 @@  gd_t *global_data;
 "	bctr\n"				\
 	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11");
 #elif defined(CONFIG_ARM)
+#ifdef CONFIG_ARMV8
+/*
+ * x18 holds the pointer to the global_data, x9 is a call-clobbered
+ * register
+ */
+#define EXPORT_FUNC(x) \
+	asm volatile (			\
+"	.globl " #x "\n"		\
+#x ":\n"				\
+"	ldr	x9, [x18, %0]\n"		\
+"	ldr	x9, [x9, %1]\n"		\
+"	br	x9\n"		\
+	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x9");
+#else
 /*
  * r8 holds the pointer to the global_data, ip is a call-clobbered
  * register
@@ -50,6 +64,7 @@  gd_t *global_data;
 "	ldr	ip, [r8, %0]\n"		\
 "	ldr	pc, [ip, %1]\n"		\
 	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip");
+#endif
 #elif defined(CONFIG_MIPS)
 /*
  * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call-
diff --git a/include/image.h b/include/image.h
index f93a393..12262d7 100644
--- a/include/image.h
+++ b/include/image.h
@@ -156,6 +156,7 @@  struct lmb;
 #define IH_ARCH_SANDBOX		19	/* Sandbox architecture (test only) */
 #define IH_ARCH_NDS32	        20	/* ANDES Technology - NDS32  */
 #define IH_ARCH_OPENRISC        21	/* OpenRISC 1000  */
+#define IH_ARCH_ARM64		22	/* ARM64	*/
 
 /*
  * Image Types