Patchwork [U-Boot,v5,1/4] core support of arm64

login
register
mail settings
Submitter fenghua@phytium.com.cn
Date Aug. 24, 2013, 1:06 a.m.
Message ID <1377306389-25369-4-git-send-email-fenghua@phytium.com.cn>
Download mbox | patch
Permalink /patch/269600/
State Superseded
Delegated to: Albert ARIBAUD
Headers show

Comments

fenghua@phytium.com.cn - Aug. 24, 2013, 1:06 a.m.
From: David Feng <fenghua@phytium.com.cn>

Signed-off-by: David Feng <fenghua@phytium.com.cn>
---
Changeds for v4:
  - Replace __ARMEB__ with __AARCH64EB__ in byteorder.h and unaligned.h,
    gcc for aarch64 use __AARCH64EB__ and __AARCH64EL__ to identify endian.
  - Some modification to README.armv8

 arch/arm/config.mk                      |    4 +
 arch/arm/cpu/armv8/Makefile             |   56 ++++++
 arch/arm/cpu/armv8/cache.S              |  145 +++++++++++++++
 arch/arm/cpu/armv8/cache_v8.c           |  291 +++++++++++++++++++++++++++++++
 arch/arm/cpu/armv8/config.mk            |   31 ++++
 arch/arm/cpu/armv8/cpu.c                |   68 ++++++++
 arch/arm/cpu/armv8/crt0.S               |  130 ++++++++++++++
 arch/arm/cpu/armv8/exceptions.S         |  182 +++++++++++++++++++
 arch/arm/cpu/armv8/interrupts.c         |  116 ++++++++++++
 arch/arm/cpu/armv8/relocate.S           |   71 ++++++++
 arch/arm/cpu/armv8/start.S              |  200 +++++++++++++++++++++
 arch/arm/cpu/armv8/timer.c              |   95 ++++++++++
 arch/arm/cpu/armv8/tlb.S                |   38 ++++
 arch/arm/cpu/armv8/u-boot.lds           |   83 +++++++++
 arch/arm/include/asm/arch-armv8/armv8.h |   44 +++++
 arch/arm/include/asm/arch-armv8/gpio.h  |   26 +++
 arch/arm/include/asm/arch-armv8/mmu.h   |  117 +++++++++++++
 arch/arm/include/asm/byteorder.h        |   12 ++
 arch/arm/include/asm/config.h           |   10 ++
 arch/arm/include/asm/global_data.h      |    6 +-
 arch/arm/include/asm/io.h               |   12 +-
 arch/arm/include/asm/macro.h            |   26 +++
 arch/arm/include/asm/posix_types.h      |   31 ++++
 arch/arm/include/asm/proc-armv/ptrace.h |   38 ++++
 arch/arm/include/asm/proc-armv/system.h |   58 +++++-
 arch/arm/include/asm/types.h            |   14 ++
 arch/arm/include/asm/u-boot.h           |    4 +
 arch/arm/include/asm/unaligned.h        |   14 ++
 arch/arm/lib/Makefile                   |    8 +
 arch/arm/lib/board.c                    |   18 ++
 arch/arm/lib/bootm.c                    |   16 ++
 common/image.c                          |    1 +
 doc/README.armv8                        |   14 ++
 examples/standalone/stubs.c             |   15 ++
 include/image.h                         |    1 +
 35 files changed, 1987 insertions(+), 8 deletions(-)
 create mode 100644 arch/arm/cpu/armv8/Makefile
 create mode 100644 arch/arm/cpu/armv8/cache.S
 create mode 100644 arch/arm/cpu/armv8/cache_v8.c
 create mode 100644 arch/arm/cpu/armv8/config.mk
 create mode 100644 arch/arm/cpu/armv8/cpu.c
 create mode 100644 arch/arm/cpu/armv8/crt0.S
 create mode 100644 arch/arm/cpu/armv8/exceptions.S
 create mode 100644 arch/arm/cpu/armv8/interrupts.c
 create mode 100644 arch/arm/cpu/armv8/relocate.S
 create mode 100644 arch/arm/cpu/armv8/start.S
 create mode 100644 arch/arm/cpu/armv8/timer.c
 create mode 100644 arch/arm/cpu/armv8/tlb.S
 create mode 100644 arch/arm/cpu/armv8/u-boot.lds
 create mode 100644 arch/arm/include/asm/arch-armv8/armv8.h
 create mode 100644 arch/arm/include/asm/arch-armv8/gpio.h
 create mode 100644 arch/arm/include/asm/arch-armv8/mmu.h
 create mode 100644 doc/README.armv8

Patch

diff --git a/arch/arm/config.mk b/arch/arm/config.mk
index ce3903b..f1c6a7b 100644
--- a/arch/arm/config.mk
+++ b/arch/arm/config.mk
@@ -74,7 +74,9 @@  endif
 endif
 
 # needed for relocation
+ifndef CONFIG_ARMV8
 LDFLAGS_u-boot += -pie
+endif
 
 #
 # FIXME: binutils versions < 2.22 have a bug in the assembler where
@@ -95,6 +97,8 @@  endif
 endif
 
 # check that only R_ARM_RELATIVE relocations are generated
+ifndef CONFIG_ARMV8
 ifneq ($(CONFIG_SPL_BUILD),y)
 ALL-y	+= checkarmreloc
 endif
+endif
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
new file mode 100644
index 0000000..55fd365
--- /dev/null
+++ b/arch/arm/cpu/armv8/Makefile
@@ -0,0 +1,56 @@ 
+#
+# Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundatio; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+
+include $(TOPDIR)/config.mk
+
+LIB	= $(obj)lib$(CPU).o
+
+START	:= start.o
+
+COBJS	+= cpu.o
+COBJS	+= timer.o
+COBJS	+= cache_v8.o
+COBJS	+= interrupts.o
+
+SOBJS	+= crt0.o
+SOBJS	+= relocate.o
+SOBJS	+= exceptions.o
+SOBJS	+= cache.o
+SOBJS	+= tlb.o
+
+SRCS	:= $(START:.o=.S) $(COBJS:.o=.c)
+OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
+START	:= $(addprefix $(obj),$(START))
+
+all:	$(obj).depend $(START) $(LIB)
+
+$(LIB):	$(OBJS)
+	$(call cmd_link_o_target, $(OBJS))
+
+#########################################################################
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
new file mode 100644
index 0000000..b1a95ec
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache.S
@@ -0,0 +1,145 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * void __asm_flush_dcache_level(level)
+ *
+ * clean and invalidate one level cache.
+ *
+ * x0: cache level
+ * x1~x9: clobbered
+ */
+ENTRY(__asm_flush_dcache_level)
+	lsl	x1, x0, #1
+	msr	csselr_el1, x1			/* select cache level */
+	isb					/* isb to sych the new cssr&csidr */
+	mrs	x6, ccsidr_el1			/* read the new ccsidr */
+	and	x2, x6, #7			/* x2 = length of the cache lines */
+	add	x2, x2, #4			/* add 4 (line length offset) */
+	mov	x3, #0x3ff
+	and	x3, x3, x6, lsr #3		/* x3 = maximum number of the way size */
+	clz	w5, w3				/* bit position of way size */
+	mov	x4, #0x7fff
+	and	x4, x4, x1, lsr #13		/* x4 = max number of the set size */
+	/* x1 = cache level << 1 */
+	/* x2 = line length offset */
+	/* x3 = number of cache ways */
+	/* x4 = number of cache sets */
+	/* x5 = bit position of way size */
+
+loop_set:
+	mov	x6, x3				/* create working copy of way size */
+loop_way:
+	lsl	x7, x6, x5
+	orr	x9, x0, x7			/* map way and level to cisw value */
+	lsl	x7, x4, x2
+	orr	x9, x9, x7			/* map set number to cisw value */
+	dc	cisw, x9			/* clean & invalidate by set/way */
+	subs	x6, x6, #1			/* decrement the way */
+	b.ge	loop_way
+	subs	x4, x4, #1			/* decrement the set */
+	b.ge	loop_set
+
+	ret
+ENDPROC(__asm_flush_dcache_level)
+
+/*
+ * void __asm_flush_dcache_all(void)
+ *
+ * clean and invalidate all data cache by SET/WAY.
+ */
+ENTRY(__asm_flush_dcache_all)
+	dsb	sy
+	mov	x15, lr
+	mrs	x10, clidr_el1			/* read clidr */
+	lsr	x11, x10, #24
+	and	x11, x11, #0x7			/* x11 = loc */
+	cbz	x11, finished			/* if loc = 0, no need to clean */
+	mov	x0, #0				/* start flush at cache level 0 */
+	/* x0 = level */
+	/* x10 = clidr_el1 */
+	/* x11 = loc */
+
+loop_level:
+	lsl	x1, x0, #1
+	add	x1, x1, x0			/* x0 = 3x cache level */
+	lsr	x1, x10, x1
+	and	x1, x1, #7			/* x1 = cache type */
+	cmp	x1, #2
+	b.lt	skip				/* skip if no cache or icache */
+	bl	__asm_flush_dcache_level	/* flush dcache of level */
+skip:
+	add	x0, x0, #1			/* increment cache level */
+	cmp	x11, x0
+	b.gt	loop_level
+
+finished:
+	mov	x0, #0
+	msr	csselr_el1, x0			/* swith back to cache level 0 */
+	dsb	sy
+	isb
+	mov	lr, x15
+	ret
+ENDPROC(__asm_flush_dcache_all)
+
+/*
+ * void __asm_flush_dcache_range(start, end)
+ *
+ * clean & invalidate data cache in the range
+ *
+ * x0: start address
+ * x1: end address
+ */
+ENTRY(__asm_flush_dcache_range)
+	mrs	x3, ctr_el0			/* read CTR */
+	lsr	x3, x3, #16
+	and	x3, x3, #0xf			/* cache line size encoding */
+	mov	x2, #4				/* bytes per word */
+	lsl	x2, x2, x3			/* actual cache line size */
+
+	/* x2 = minimal cache line size in cache system */
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:      dc	civac, x0			/* clean & invalidate D line / unified line */
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__asm_flush_dcache_range)
+
+/*
+ * void __asm_invalidate_icache_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_icache_all)
+	ic	ialluis
+	isb	sy
+	ret
+ENDPROC(__asm_invalidate_icache_all)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
new file mode 100644
index 0000000..be2b49f
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -0,0 +1,291 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+#include <asm/arch/armv8.h>
+#include <asm/arch/mmu.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+extern void __asm_flush_dcache_all(void);
+extern void __asm_flush_dcache_range(u64 start, u64 end);
+extern void __asm_invalidate_tlb_all(void);
+extern void __asm_invalidate_icache_all(void);
+
+static inline unsigned int get_sctlr(void)
+{
+	unsigned int val;
+	asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
+	return val;
+}
+
+static inline void set_sctlr(unsigned int val)
+{
+	asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
+	asm volatile("isb");
+}
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+
+static void set_pgtable_section(u64 section, u64 memory_type)
+{
+	u64 *page_table = (u64 *)gd->arch.tlb_addr;
+	u64 value;
+
+	value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
+	value |= PMD_ATTRINDX(memory_type);
+	page_table[section] = value;
+}
+
+/* to activate the MMU we need to set up virtual memory */
+static inline void mmu_setup(void)
+{
+	int i, j;
+	bd_t *bd = gd->bd;
+	static int table_initialized = 0;
+
+	if (!table_initialized) {
+		/* Setup an identity-mapping for all spaces */
+		for (i = 0; i < (PAGE_SIZE >> 3); i++)
+			set_pgtable_section(i, MT_DEVICE_nGnRnE);
+
+		/* Setup an identity-mapping for all RAM space */
+		for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+			debug("%s: bank: %d\n", __func__, i);
+			for (j = bd->bi_dram[i].start >> SECTION_SHIFT;
+				 j < (bd->bi_dram[i].start + bd->bi_dram[i].size) >> SECTION_SHIFT;
+				 j++) {
+				set_pgtable_section(i, MT_NORMAL);
+			}
+		}
+
+		/* load TTBR0 */
+		asm volatile("msr ttbr0_el2, %0" : : "r" (gd->arch.tlb_addr) : "memory");
+
+		table_initialized = 1;
+	}
+
+	/* and enable the mmu */
+	set_sctlr(get_sctlr() | CR_M);
+}
+
+/*
+ * Performs a invalidation of the entire data cache
+ * at all levels
+ */
+void invalidate_dcache_all(void)
+{
+	__asm_flush_dcache_all();
+	v8_outer_cache_inval_all();
+}
+
+/*
+ * Performs a clean & invalidation of the entire data cache
+ * at all levels
+ */
+void flush_dcache_all(void)
+{
+	__asm_flush_dcache_all();
+	v8_outer_cache_flush_all();
+}
+
+/*
+ * Invalidates range in all levels of D-cache/unified cache used:
+ * Affects the range [start, stop - 1]
+ */
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+	__asm_flush_dcache_range(start, stop);
+	v8_outer_cache_inval_range(start, stop);
+}
+
+/*
+ * Flush range(clean & invalidate) from all levels of D-cache/unified
+ * cache used:
+ * Affects the range [start, stop - 1]
+ */
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+	__asm_flush_dcache_range(start, stop);
+	v8_outer_cache_flush_range(start, stop);
+}
+
+void dcache_enable(void)
+{
+	uint32_t sctlr;
+
+	sctlr = get_sctlr();
+
+	/* The data cache is not active unless the mmu is enabled too */
+	if (!(sctlr & CR_M)) {
+		v8_outer_cache_enable();
+		invalidate_dcache_all();
+		__asm_invalidate_tlb_all();
+
+		mmu_setup();
+	}
+
+	set_sctlr(sctlr | CR_C);
+}
+
+void dcache_disable(void)
+{
+	uint32_t sctlr;
+
+	sctlr = get_sctlr();
+
+	/* if cache isn't enabled no need to disable */
+	if (!(sctlr & CR_C))
+		return;
+
+	set_sctlr(sctlr & ~(CR_C|CR_M));
+
+	flush_dcache_all();
+	__asm_invalidate_tlb_all();
+}
+
+int dcache_status(void)
+{
+	return (get_sctlr() & CR_C) != 0;
+}
+
+#else	/* CONFIG_SYS_DCACHE_OFF */
+
+void invalidate_dcache_all(void)
+{
+}
+
+void flush_dcache_all(void)
+{
+}
+
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void dcache_enable(void)
+{
+}
+
+void dcache_disable(void)
+{
+}
+
+int dcache_status(void)
+{
+	return 0;
+}
+
+#endif	/* CONFIG_SYS_DCACHE_OFF */
+
+#ifndef CONFIG_SYS_ICACHE_OFF
+
+void icache_enable(void)
+{
+	set_sctlr(get_sctlr() | CR_I);
+}
+
+void icache_disable(void)
+{
+	set_sctlr(get_sctlr() & ~CR_I);
+}
+
+int icache_status(void)
+{
+	return (get_sctlr() & CR_I) != 0;
+}
+
+void invalidate_icache_all(void)
+{
+	__asm_invalidate_icache_all();
+}
+
+#else	/* CONFIG_SYS_ICACHE_OFF */
+
+void icache_enable(void)
+{
+}
+
+void icache_disable(void)
+{
+}
+
+int icache_status(void)
+{
+	return 0;
+}
+
+void invalidate_icache_all(void)
+{
+}
+
+#endif	/* CONFIG_SYS_ICACHE_OFF */
+
+/*
+ * Enable dCache & iCache, whether cache is actually enabled
+ * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
+ */
+void enable_caches(void)
+{
+	icache_enable();
+	dcache_enable();
+}
+
+/*
+ * Flush range from all levels of d-cache/unified-cache used:
+ * Affects the range [start, start + size - 1]
+ */
+void flush_cache(unsigned long start, unsigned long size)
+{
+	flush_dcache_range(start, start + size);
+}
+
+/*
+ * Stub implementations for outer cache operations
+ */
+void __v8_outer_cache_enable(void) {}
+void v8_outer_cache_enable(void)
+	__attribute__((weak, alias("__v8_outer_cache_enable")));
+
+void __v8_outer_cache_disable(void) {}
+void v8_outer_cache_disable(void)
+	__attribute__((weak, alias("__v8_outer_cache_disable")));
+
+void __v8_outer_cache_flush_all(void) {}
+void v8_outer_cache_flush_all(void)
+	__attribute__((weak, alias("__v8_outer_cache_flush_all")));
+
+void __v8_outer_cache_inval_all(void) {}
+void v8_outer_cache_inval_all(void)
+	__attribute__((weak, alias("__v8_outer_cache_inval_all")));
+
+void __v8_outer_cache_flush_range(u64 start, u64 end) {}
+void v8_outer_cache_flush_range(u64 start, u64 end)
+	__attribute__((weak, alias("__v8_outer_cache_flush_range")));
+
+void __v8_outer_cache_inval_range(u64 start, u64 end) {}
+void v8_outer_cache_inval_range(u64 start, u64 end)
+	__attribute__((weak, alias("__v8_outer_cache_inval_range")));
diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
new file mode 100644
index 0000000..aae2170
--- /dev/null
+++ b/arch/arm/cpu/armv8/config.mk
@@ -0,0 +1,31 @@ 
+#
+# Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+PLATFORM_RELFLAGS += -fno-common -ffixed-x18
+
+# SEE README.arm-unaligned-accesses
+PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
+PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
+
+PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
+PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
+PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
+PLATFORM_CPPFLAGS += -fpic
diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
new file mode 100644
index 0000000..5db7505
--- /dev/null
+++ b/arch/arm/cpu/armv8/cpu.c
@@ -0,0 +1,68 @@ 
+/*
+ * (C) Copyright 2008 Texas Insturments
+ *
+ * (C) Copyright 2002
+ * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
+ * Marius Groeger <mgroeger@sysgo.de>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * CPU specific code
+ */
+
+#include <common.h>
+#include <command.h>
+#include <asm/arch/armv8.h>
+#include <linux/compiler.h>
+
+void __weak cpu_cache_initialization(void){}
+
+int cleanup_before_linux(void)
+{
+	/*
+	 * this function is called just before we call linux
+	 * it prepares the processor for linux
+	 *
+	 * we turn off caches etc ...
+	 */
+#ifndef CONFIG_SPL_BUILD
+	disable_interrupts();
+#endif
+
+	/*
+	 * Turn off I-cache and invalidate it
+	 */
+	icache_disable();
+	invalidate_icache_all();
+
+	/*
+	 * turn off D-cache
+	 * dcache_disable() in turn flushes the d-cache and disables MMU
+	 */
+	dcache_disable();
+	v8_outer_cache_disable();
+
+	/*
+	 * After D-cache is flushed and before it is disabled there may
+	 * be some new valid entries brought into the cache. We are sure
+	 * that these lines are not dirty and will not affect our execution.
+	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
+	 * is all we did during this. We have not pushed anything on to the
+	 * stack. Neither have we affected any static data)
+	 * So just invalidate the entire d-cache again to avoid coherency
+	 * problems for kernel
+	 */
+	invalidate_dcache_all();
+
+	/*
+	 * Some CPU need more cache attention before starting the kernel.
+	 */
+	cpu_cache_initialization();
+
+	return 0;
+}
diff --git a/arch/arm/cpu/armv8/crt0.S b/arch/arm/cpu/armv8/crt0.S
new file mode 100644
index 0000000..fe39495
--- /dev/null
+++ b/arch/arm/cpu/armv8/crt0.S
@@ -0,0 +1,130 @@ 
+/*
+ * crt0 - C-runtime startup Code for ARM64 U-Boot
+ *
+ * Copyright (c) 2013  David Feng <fenghua@phytium.com.cn>
+ *
+ * Bsed on arm/lib/crt0.S by Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <config.h>
+#include <asm-offsets.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * This file handles the target-independent stages of the U-Boot
+ * start-up where a C runtime environment is needed. Its entry point
+ * is _main and is branched into from the target's start.S file.
+ *
+ * _main execution sequence is:
+ *
+ * 1. Set up initial environment for calling board_init_f().
+ *    This environment only provides a stack and a place to store
+ *    the GD ('global data') structure, both located in some readily
+ *    available RAM (SRAM, locked cache...). In this context, VARIABLE
+ *    global data, initialized or not (BSS), are UNAVAILABLE; only
+ *    CONSTANT initialized data are available.
+ *
+ * 2. Call board_init_f(). This function prepares the hardware for
+ *    execution from system RAM (DRAM, DDR...) As system RAM may not
+ *    be available yet, , board_init_f() must use the current GD to
+ *    store any data which must be passed on to later stages. These
+ *    data include the relocation destination, the future stack, and
+ *    the future GD location.
+ *
+ * (the following applies only to non-SPL builds)
+ *
+ * 3. Set up intermediate environment where the stack and GD are the
+ *    ones allocated by board_init_f() in system RAM, but BSS and
+ *    initialized non-const data are still not available.
+ *
+ * 4. Call relocate_code(). This function relocates U-Boot from its
+ *    current location into the relocation destination computed by
+ *    board_init_f().
+ *
+ * 5. Set up final environment for calling board_init_r(). This
+ *    environment has BSS (initialized to 0), initialized non-const
+ *    data (initialized to their intended value), and stack in system
+ *    RAM. GD has retained values set by board_init_f(). Some CPUs
+ *    have some work left to do at this point regarding memory, so
+ *    call c_runtime_cpu_setup.
+ *
+ * 6. Branch to board_init_r().
+ */
+
+ENTRY(_main)
+
+/*
+ * Set up initial C runtime environment and call board_init_f(0).
+ */
+	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
+	sub	x0, x0, #GD_SIZE	/* allocate one GD above SP */
+	bic	sp, x0, #0xf		/* 16-byte alignment for ABI compliance */
+	mov	x18, sp			/* GD is above SP */
+	mov	x0, #0
+	bl	board_init_f
+
+/*
+ * Set up intermediate environment (new sp and gd) and call
+ * relocate_code(addr_moni). Trick here is that we'll return
+ * 'here' but relocated.
+ */
+	ldr	x0, [x18, #GD_START_ADDR_SP]	/* x0 = gd->start_addr_sp */
+	bic	sp, x0, #0xf			/* 16-byte alignment for ABI compliance */
+	ldr	x18, [x18, #GD_BD]		/* x18 = gd->bd */
+	sub	x18, x18, #GD_SIZE		/* new GD is below bd */
+
+	adr	lr, relocation_return
+	ldr	x9, [x18, #GD_RELOC_OFF]	/* x0 = gd->reloc_off */
+	add	lr, lr, x9			/* new return address after relocation */
+	ldr	x0, [x18, #GD_RELOCADDR]	/* x0 = gd->relocaddr */
+	b	relocate_code
+
+relocation_return:
+
+/*
+ * Set up final (full) environment
+ */
+	bl	c_runtime_cpu_setup		/* we still call old routine here */
+
+/*
+ * Clear BSS section
+ */
+	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 = gd->reloc_off */
+	ldr	x0, =__bss_start		/* x0 = __bss_start in FLASH */
+	add	x0, x0, x9			/* x0 = __bss_start in RAM */
+	ldr	x1, =__bss_end			/* x1 = __bss_end in FLASH */
+	add	x1, x1, x9			/* x1 = __bss_end in RAM */
+	mov	x2, #0
+clear_loop:
+	str	x2, [x0]
+	add	x0, x0, #8
+	cmp	x0, x1
+	b.lo	clear_loop
+
+	/* call board_init_r(gd_t *id, ulong dest_addr) */
+	mov	x0, x18				/* gd_t */
+	ldr	x1, [x18, #GD_RELOCADDR]	/* dest_addr */
+	b	board_init_r			/* PC relative jump */
+
+	/* NOTREACHED - board_init_r() does not return */
+
+ENDPROC(_main)
diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
new file mode 100644
index 0000000..c64a326
--- /dev/null
+++ b/arch/arm/cpu/armv8/exceptions.S
@@ -0,0 +1,182 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/ptrace.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * Exception vectors.
+ */
+	.align	11
+ENTRY(vectors)
+	ventry	el_current_sync_invalid		/* Current EL Synchronous Thread */
+	ventry	el_current_irq_invalid		/* Current EL IRQ Thread */
+	ventry	el_current_fiq_invalid		/* Current EL FIQ Thread */
+	ventry	el_current_error_invalid	/* Current EL Error Thread */
+
+	ventry	el_current_sync_invalid		/* Current EL Synchronous Handler */
+	ventry	el_current_irq_invalid		/* Current EL IRQ Handler */
+	ventry	el_current_fiq_invalid		/* Current EL FIQ Handler */
+	ventry	el_current_error_invalid	/* Current EL Error Handler */
+
+	ventry	el_lower_sync_invalid		/* Lower EL Synchronous 64-bit Handler */
+	ventry	el_lower_irq_invalid		/* Lower EL IRQ 64-bit Handler */
+	ventry	el_lower_fiq_invalid		/* Lower EL FIQ 64-bit Handler */
+	ventry	el_lower_error_invalid		/* Lower EL Error 64-bit Handler */
+
+	ventry	el_lower_sync_invalid		/* Lower EL Synchronous 32-bit Handler */
+	ventry	el_lower_irq_invalid		/* Lower EL IRQ 32-bit Handler */
+	ventry	el_lower_fiq_invalid		/* Lower EL FIQ 32-bit Handler */
+	ventry	el_lower_error_invalid		/* Lower EL Error 32-bit Handler */
+END(vectors)
+
+/*
+ * Enter Exception.
+ * This will save the processor state that is X0~X29/LR/SP/ELR/PSTATE
+ * to the stack frame.
+ */
+.macro	exception_entry, el
+	sub	sp, sp, #S_FRAME_SIZE - S_LR	/* LR,SP,ELR,PSTATE will be saved later */
+	push	x28, x29
+	push	x26, x27
+	push	x24, x25
+	push	x22, x23
+	push	x20, x21
+	push	x18, x19
+	push	x16, x17
+	push	x14, x15
+	push	x12, x13
+	push	x10, x11
+	push	x8, x9
+	push	x6, x7
+	push	x4, x5
+	push	x2, x3
+	push	x0, x1
+	.if	\el == 2
+		add	x21, sp, #S_FRAME_SIZE
+	.else
+		mrs	x21, sp_el1
+	.endif
+	mrs	x22, elr_el2
+	mrs	x23, spsr_el2
+	stp	lr, x21, [sp, #S_LR]
+	stp	x22, x23, [sp, #S_PC]
+.endm
+
+/*
+ * Exit Exception.
+ * This will restore the processor state that is X0~X29/LR/SP/ELR/PSTATE
+ * from the stack frame and return from exceprion.
+ */
+.macro	exception_exit, el
+	ldp	x21, x22, [sp, #S_PC]
+	msr	elr_el2, x21			/* restore elr register */
+	msr	spsr_el2, x22			/* restore spsr register */
+	.if	\el == 1
+		ldr	x23, [sp, #S_SP]
+		msr	sp_el1, x23		/* restore return stack pointer */
+	.endif
+	pop	x0, x1				/* restore the general registers */
+	pop	x2, x3
+	pop	x4, x5
+	pop	x6, x7
+	pop	x8, x9
+	pop	x10, x11
+	pop	x12, x13
+	pop	x14, x15
+	pop	x16, x17
+	pop	x18, x19
+	pop	x20, x21
+	pop	x22, x23
+	pop	x24, x25
+	pop	x26, x27
+	pop	x28, x29
+	ldr	lr, [sp], #S_FRAME_SIZE - S_LR	/* restore LR and SP */
+	eret					/* exception return */
+.endm
+
+/*
+ * Current EL exception entry
+ */
+ENTRY(el_current_sync_invalid)
+	exception_entry 2
+	mov	x0, sp
+	mrs	x1, esr_el2
+	b	do_bad_sync
+ENDPROC(el_current_sync_invalid)
+
+ENTRY(el_current_irq_invalid)
+	exception_entry 2
+	mov	x0, sp
+	mrs	x1, esr_el2
+	b	do_bad_irq
+ENDPROC(el_current_irq_invalid)
+
+ENTRY(el_current_fiq_invalid)
+	exception_entry 2
+	mov	x0, sp
+	mrs	x1, esr_el2
+	b	do_bad_fiq
+ENDPROC(el_current_fiq_invalid)
+
+ENTRY(el_current_error_invalid)
+	exception_entry 2
+	mov	x0, sp
+	mrs	x1, esr_el2
+	b	do_bad_error
+ENDPROC(el_current_error_invalid)
+
+
+/*
+ * Lower EL exception entry
+ */
+ENTRY(el_lower_sync_invalid)
+	exception_entry 1
+	mov	x0, sp
+	mrs	x1, esr_el2
+	b	do_bad_sync
+ENDPROC(el_lower_sync_invalid)
+
+ENTRY(el_lower_irq_invalid)
+	exception_entry 1
+	mov	x0, sp
+	mrs	x1, esr_el2
+	b	do_bad_irq
+ENDPROC(el_lower_irq_invalid)
+
+ENTRY(el_lower_fiq_invalid)
+	exception_entry 1
+	mov	x0, sp
+	mrs	x1, esr_el2
+	b	do_bad_fiq
+ENDPROC(el_lower_fiq_invalid)
+
+ENTRY(el_lower_error_invalid)
+	exception_entry 1
+	mov	x0, sp
+	mrs	x1, esr_el2
+	b	do_bad_error
+ENDPROC(el_lower_error_invalid)
diff --git a/arch/arm/cpu/armv8/interrupts.c b/arch/arm/cpu/armv8/interrupts.c
new file mode 100644
index 0000000..b4d2368
--- /dev/null
+++ b/arch/arm/cpu/armv8/interrupts.c
@@ -0,0 +1,116 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+
+
+#ifdef CONFIG_USE_IRQ
+int interrupt_init (void)
+{
+	return 0;
+}
+
+/* enable IRQ interrupts */
+void enable_interrupts (void)
+{
+}
+
+/*
+ * disable IRQ/FIQ interrupts
+ * returns true if interrupts had been enabled before we disabled them
+ */
+int disable_interrupts (void)
+{
+	return 0;
+}
+#else
+int interrupt_init (void)
+{
+	return 0;
+}
+
+void enable_interrupts (void)
+{
+	return;
+}
+int disable_interrupts (void)
+{
+	return 0;
+}
+#endif /* CONFIG_USE_IRQ */
+
+
+void show_regs (struct pt_regs *regs)
+{
+	int i;
+
+	printf("PC is at %lx\n", regs->pc);
+	printf("LR is at %lx\n", regs->regs[30]);
+	printf("PSTATE: %08lx\n", regs->pstate);
+	printf("SP : %lx\n", regs->sp);
+	for (i = 29; i >= 0; i--) {
+		printf("x%-2d: %016lx ", i, regs->regs[i]);
+		if (i % 2 == 0)
+			printf("\n");
+	}
+	printf("\n");
+}
+
+/*
+ * do_bad_sync handles the impossible case in the Synchronous Abort vector.
+ */
+void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Synchronous Abort\" handler detected, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_irq handles the impossible case in the Irq vector.
+ */
+void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Irq\" handler detected, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_fiq handles the impossible case in the Fiq vector.
+ */
+void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Fiq\" handler detected, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_error handles the impossible case in the Error vector.
+ */
+void do_bad_error(struct pt_regs *pt_regs, unsigned int esr)
+{
+	printf("Bad mode in \"Error\" handler detected, esr 0x%08x\n", esr);
+	show_regs(pt_regs);
+	panic("Resetting CPU ...\n");
+}
diff --git a/arch/arm/cpu/armv8/relocate.S b/arch/arm/cpu/armv8/relocate.S
new file mode 100644
index 0000000..87b5ef0
--- /dev/null
+++ b/arch/arm/cpu/armv8/relocate.S
@@ -0,0 +1,71 @@ 
+/*
+ * relocate - common relocation function for ARM64 U-Boot
+ *
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <linux/linkage.h>
+
+/*
+ * void relocate_code (addr_moni)
+ *
+ * This function relocates the monitor code.
+ *
+ * NOTE:
+ * GOT is used and configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
+ */
+ENTRY(relocate_code)
+	/*
+	 * Copy u-boot from flash to RAM
+	 */
+	ldr	x1, =__image_copy_start	/* x1 <- copy source */
+	cmp	x1, x0
+	b.eq	relocate_done		/* skip relocation */
+	mov	x2, x0			/* x2 <- copy destination */
+	ldr	x3, =__image_copy_end	/* x3 <- source end address */
+
+copy_loop:
+	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
+	stp	x10, x11, [x2], #16	/* copy to   target address [x2] */
+	cmp	x1, x3			/* until source end address [x3] */
+	b.lo	copy_loop
+
+	/*
+	 * Fix .reloc relocations
+	 */
+	ldr	x9, [x18, #GD_RELOC_OFF]/* x9 <- relocation offset */
+	ldr	x1, =__rel_got_start	/* x1 <- rel got start ofs */
+	add	x1, x1, x9		/* x1 <- rel got start in RAM */
+	ldr	x2, =__rel_got_end	/* x2 <- rel got end ofs */
+	add	x2, x2, x9		/* x2 <- rel got end in RAM */
+fixloop:
+	ldr	x10, [x1]		/* x10 <- address to be fixed up */
+	add	x10, x10, x9		/* x10 <- address to be fixed up in RAM*/
+	str	x10, [x1]
+	add	x1, x1, #8		/* each gotn entry is 8 bytes */
+	cmp	x1, x2
+	b.lo	fixloop
+
+relocate_done:
+	ret
+ENDPROC(relocate_code)
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
new file mode 100644
index 0000000..328669a
--- /dev/null
+++ b/arch/arm/cpu/armv8/start.S
@@ -0,0 +1,200 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/arch/mmu.h>
+
+/*************************************************************************
+ *
+ * Startup Code (reset vector)
+ *
+ *************************************************************************/
+
+.globl _start
+_start:
+	b	reset
+
+	.align 3
+
+.globl _TEXT_BASE
+_TEXT_BASE:
+	.quad	CONFIG_SYS_TEXT_BASE
+
+/*
+ * These are defined in the linker script.
+ */
+.globl	_end_ofs
+_end_ofs:
+	.quad	_end - _start
+
+.globl	_bss_start_ofs
+_bss_start_ofs:
+	.quad	__bss_start - _start
+
+.globl	_bss_end_ofs
+_bss_end_ofs:
+	.quad	__bss_end - _start
+
+reset:
+	/*
+	 * EL3 initialisation
+	 */
+	mrs	x0, CurrentEL
+	cmp	x0, #0xc			/* EL3? */
+	b.ne	reset_nonsecure			/* skip EL3 initialisation */
+
+	mov	x0, #0x30			/* RES1 */
+	orr	x0, x0, #(1 << 0)		/* Non-secure EL1 */
+	orr	x0, x0, #(1 << 8)		/* HVC enable */
+	orr	x0, x0, #(1 << 10)		/* 64-bit EL2 */
+	msr	scr_el3, x0
+
+	msr	cptr_el3, xzr			/* Disable copro. traps to EL3 */
+
+	/* Counter frequency initialisation */
+	ldr	x0, =CONFIG_SYS_CNTFRQ
+	msr	cntfrq_el0, x0
+
+	/* GIC initialisation */
+	mrs	x0, mpidr_el1
+	tst	x0, #15
+	b.ne	1f				/* secondary CPU */
+
+	ldr	x1, =GIC_DIST_BASE		/* GICD_CTLR */
+	mov	w0, #3				/* EnableGrp0 | EnableGrp1 */
+	str	w0, [x1]
+
+1:	ldr	x1, =GIC_DIST_BASE + 0x80	/* GICD_IGROUPR */
+	mov	w0, #~0				/* Grp1 interrupts */
+	str	w0, [x1], #4
+	b.ne	2f				/* Only local interrupts for secondary CPUs */
+	str	w0, [x1], #4
+	str	w0, [x1], #4
+
+2:	ldr	x1, =GIC_CPU_BASE		/* GICC_CTLR */
+	ldr	w0, [x1]
+	mov	w0, #3				/* EnableGrp0 | EnableGrp1 */
+	str	w0, [x1]
+
+	mov	w0, #1 << 7			/* allow NS access to GICC_PMR */
+	str	w0, [x1, #4]			/* GICC_PMR */
+
+	/* SCTLR_EL2 initialisation */
+	msr	sctlr_el2, xzr
+
+	/* Return to the EL2_SP1 mode from EL3 */
+	adr	x0, reset_nonsecure
+	mov	x1, #0x3c9			/* EL2_SP1 | D | A | I | F */
+	msr	elr_el3, x0
+	msr	spsr_el3, x1
+	eret
+
+	/*
+	 * EL2 initialisation:
+	 * MMU Disabled, iCache Disabled, dCache Disabled
+	 */
+reset_nonsecure:
+	/* SCTLR_EL2 initialisation */
+	mov	x0, #0x2			/* Alignment check enable */
+	msr	sctlr_el2, x0
+
+	/* Initialize vBAR */
+	adr	x0, vectors
+	msr	vbar_el2, x0
+
+	/* Cache/BPB/TLB Invalidate */
+	bl	__asm_flush_dcache_all		/* dCache invalidate */
+	bl	__asm_invalidate_icache_all	/* iCache invalidate */
+	bl	__asm_invalidate_tlb_all	/* invalidate I + D TLBs */
+
+	/* Processor specific initialisation */
+#ifndef CONFIG_SKIP_LOWLEVEL_INIT
+	bl	lowlevel_init
+#endif
+
+	mrs	x0, mpidr_el1
+	tst	x0, #15
+	b.eq	master_cpu
+
+	/*
+	 * Secondary CPUs
+	 */
+slave_cpu:
+
+	wfe
+	ldr	x1, =SECONDARY_CPU_MAILBOX
+	ldr	x0, [x1]
+	cbz	x0, slave_cpu
+	br	x0				/* branch to the given address */
+
+	/*
+	 * Primary CPU
+	 */
+master_cpu:
+
+	bl	_main
+
+/*------------------------------------------------------------------------------*/
+
+ENTRY(c_runtime_cpu_setup)
+	/* If I-cache is enabled invalidate it */
+#ifndef CONFIG_SYS_ICACHE_OFF
+	ic	iallu			/* I+BTB cache invalidate */
+	isb	sy
+#endif
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+	/*
+	 * Memory region attributes:
+	 *
+	 *   n = AttrIndx[2:0]
+	 *                      n       MAIR
+	 *   DEVICE_nGnRnE      000     00000000
+	 *   DEVICE_nGnRE       001     00000100
+	 *   DEVICE_GRE         010     00001100
+	 *   NORMAL_NC          011     01000100
+	 *   NORMAL             100     11111111
+	 */
+	ldr	x0, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
+		     MAIR(0x04, MT_DEVICE_nGnRE) | \
+		     MAIR(0x0c, MT_DEVICE_GRE) | \
+		     MAIR(0x44, MT_NORMAL_NC) | \
+		     MAIR(0xff, MT_NORMAL)
+	msr     mair_el2, x0
+
+	/*
+	 * Set/prepare TCR and TTBR. Using 512GB (39-bit) address range.
+	 */
+	ldr     x0, =TCR_T0SZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT
+	orr     x0, x0, TCR_TG0_64K
+	msr     tcr_el2, x0
+#endif
+
+	/* Relocate vBAR */
+	adr	x0, vectors
+	msr	vbar_el2, x0
+
+	ret
+ENDPROC(c_runtime_cpu_setup)
diff --git a/arch/arm/cpu/armv8/timer.c b/arch/arm/cpu/armv8/timer.c
new file mode 100644
index 0000000..8c0cfcb
--- /dev/null
+++ b/arch/arm/cpu/armv8/timer.c
@@ -0,0 +1,95 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+#include <div64.h>
+
+/*
+ * Genertic Timer implementation of __udelay/get_timer/get_ticks/get_tbclk
+ * functions. If any other timers used, another implementation should be
+ * placed in platform code.
+ */
+
+static inline unsigned long tick_to_time(unsigned long tick)
+{
+	tick *= CONFIG_SYS_HZ;
+	do_div(tick, CONFIG_SYS_CNTFRQ);
+	return tick;
+}
+
+static inline unsigned long time_to_tick(unsigned long time)
+{
+	time *= CONFIG_SYS_CNTFRQ;
+	do_div(time, CONFIG_SYS_HZ);
+	return time;
+}
+
+/*
+ * Generic timer implementation of get_tbclk()
+ */
+ulong __get_tbclk (void)
+{
+	return CONFIG_SYS_HZ;
+}
+ulong get_tbclk(void)
+	__attribute__((weak, alias("__get_tbclk")));
+
+/*
+ * Generic timer implementation of get_timer()
+ */
+ulong __get_timer(ulong base)
+{
+	u64 cval;
+
+	isb();
+	asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
+
+	tick_to_time(cval);
+
+	return tick_to_time(cval) - base;
+}
+ulong get_timer(ulong base)
+	__attribute__((weak, alias("__get_timer")));
+
+/*
+ * Generic timer implementation of get_ticks()
+ */
+unsigned long long __get_ticks(void)
+{
+	return get_timer(0);
+}
+unsigned long long get_ticks(void)
+	__attribute__((weak, alias("__get_ticks")));
+
+/*
+ * Generic timer implementation of __udelay()
+ */
+void ___udelay(ulong usec)
+{
+	unsigned long tmp;
+
+	tmp = get_ticks() + usec/1000;
+
+	while (get_ticks() < tmp);
+}
+void __udelay(ulong usec)
+	__attribute__((weak, alias("___udelay")));
diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
new file mode 100644
index 0000000..f0b7dbc
--- /dev/null
+++ b/arch/arm/cpu/armv8/tlb.S
@@ -0,0 +1,38 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+
+/*
+ * void __asm_invalidate_tlb_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_tlb_all)
+	tlbi	alle2
+	dsb		sy
+	isb
+	ret
+ENDPROC(__asm_invalidate_tlb_all)
diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
new file mode 100644
index 0000000..14842e3
--- /dev/null
+++ b/arch/arm/cpu/armv8/u-boot.lds
@@ -0,0 +1,83 @@ 
+/*
+ * Copyright (c) 2013	FengHua <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+ENTRY(_start)
+SECTIONS
+{
+	. = 0x00000000;
+
+	. = ALIGN(8);
+	.text :
+	{
+		*(.__image_copy_start)
+		CPUDIR/start.o (.text*)
+		*(.text*)
+	}
+
+	. = ALIGN(8);
+	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
+
+	. = ALIGN(8);
+	.data : {
+		*(.data*)
+	}
+
+	. = ALIGN(8);
+
+	. = .;
+
+	. = ALIGN(8);
+	.u_boot_list : {
+		KEEP(*(SORT(.u_boot_list*)));
+	}
+
+	. = ALIGN(8);
+	.reloc : {
+		__rel_got_start = .;
+		*(.got)
+		__rel_got_end = .;
+	}
+
+	.image_copy_end :
+	{
+		*(.__image_copy_end)
+	}
+
+	_end = .;
+
+	. = ALIGN(8);
+	.bss : {
+		__bss_start = .;
+		*(.bss*)
+		 . = ALIGN(8);
+		__bss_end = .;
+	}
+
+	/DISCARD/ : { *(.dynsym) }
+	/DISCARD/ : { *(.dynstr*) }
+	/DISCARD/ : { *(.dynamic*) }
+	/DISCARD/ : { *(.plt*) }
+	/DISCARD/ : { *(.interp*) }
+	/DISCARD/ : { *(.gnu*) }
+}
diff --git a/arch/arm/include/asm/arch-armv8/armv8.h b/arch/arm/include/asm/arch-armv8/armv8.h
new file mode 100644
index 0000000..8506125
--- /dev/null
+++ b/arch/arm/include/asm/arch-armv8/armv8.h
@@ -0,0 +1,44 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARMV8_ARMV8_H
+#define __ASM_ARMV8_ARMV8_H
+
+/*
+ * SCTLR_EL2 bits definitions
+ */
+#define CR_M		(1 << 0)	/* MMU enable				*/
+#define CR_A		(1 << 1)	/* Alignment abort enable		*/
+#define CR_C		(1 << 2)	/* Dcache enable			*/
+#define CR_SA		(1 << 3)	/* Stack Alignment Check Enable		*/
+#define CR_I		(1 << 12)	/* Icache enable			*/
+#define CR_WXN		(1 << 19)	/* Write Permision Imply XN		*/
+#define CR_EE		(1 << 25)	/* Exception (Big) Endian		*/
+
+void v8_outer_cache_enable(void);
+void v8_outer_cache_disable(void);
+void v8_outer_cache_flush_all(void);
+void v8_outer_cache_inval_all(void);
+void v8_outer_cache_flush_range(u64 start, u64 end);
+void v8_outer_cache_inval_range(u64 start, u64 end);
+
+#endif	 /* __ASM_ARMV8_ARMV8_H */
diff --git a/arch/arm/include/asm/arch-armv8/gpio.h b/arch/arm/include/asm/arch-armv8/gpio.h
new file mode 100644
index 0000000..0fbbcaf
--- /dev/null
+++ b/arch/arm/include/asm/arch-armv8/gpio.h
@@ -0,0 +1,26 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARMV8_GPIO_H_
+#define _ASM_ARMV8_GPIO_H_
+
+#endif	/* _ASM_ARMV8_GPIO_H_ */
diff --git a/arch/arm/include/asm/arch-armv8/mmu.h b/arch/arm/include/asm/arch-armv8/mmu.h
new file mode 100644
index 0000000..3647f0a
--- /dev/null
+++ b/arch/arm/include/asm/arch-armv8/mmu.h
@@ -0,0 +1,117 @@ 
+/*
+ * Copyright (c) 2013	David Feng <fenghua@phytium.com.cn>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARMV8_MMU_H_
+#define _ASM_ARMV8_MMU_H_
+
+#ifdef __ASSEMBLY__
+#define _AC(X,Y)	X
+#define _AT(T,X)	X
+#else
+#define __AC(X,Y)	(X##Y)
+#define _AC(X,Y)	__AC(X,Y)
+#define _AT(T,X)	((T)(X))
+#endif
+
+#define UL(x)		_AC(x, UL)
+
+/***************************************************************/
+/*
+ * The following definitions are related each other, shoud be
+ * calculated specifically.
+ */
+#define VA_BITS			(39)
+
+/* PAGE_SHIFT determines the page size */
+#undef  PAGE_SIZE
+#define PAGE_SHIFT		16
+#define PAGE_SIZE		(1 << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
+
+/*
+ * section address mask and size definitions.
+ */
+#define SECTION_SHIFT		29
+#define SECTION_SIZE		(_AC(1, UL) << SECTION_SHIFT)
+#define SECTION_MASK		(~(SECTION_SIZE-1))
+/***************************************************************/
+
+/*
+ * Memory types available.
+ */
+#define MT_DEVICE_nGnRnE	0
+#define MT_DEVICE_nGnRE		1
+#define MT_DEVICE_GRE		2
+#define MT_NORMAL_NC		3
+#define MT_NORMAL		4
+
+#define MAIR(attr, mt)		((attr) << ((mt) * 8))
+
+/*
+ * Hardware page table definitions.
+ *
+ * Level 2 descriptor (PMD).
+ */
+#define PMD_TYPE_MASK		(3 << 0)
+#define PMD_TYPE_FAULT		(0 << 0)
+#define PMD_TYPE_TABLE		(3 << 0)
+#define PMD_TYPE_SECT		(1 << 0)
+
+/*
+ * Section
+ */
+#define PMD_SECT_S		(UL(3) << 8)
+#define PMD_SECT_AF		(UL(1) << 10)
+#define PMD_SECT_NG		(UL(1) << 11)
+#define PMD_SECT_PXN		(UL(1) << 53)
+#define PMD_SECT_UXN		(UL(1) << 54)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PMD_ATTRINDX(t)		((t) << 2)
+#define PMD_ATTRINDX_MASK	(7 << 2)
+
+/*
+ * TCR_EL2 flags.
+ */
+#define TCR_T0SZ(x)		((64 - (x)) << 0)
+#define TCR_IRGN_NC		(0 << 8)
+#define TCR_IRGN_WBWA		(1 << 8)
+#define TCR_IRGN_WT		(2 << 8)
+#define TCR_IRGN_WBnWA		(3 << 8)
+#define TCR_IRGN_MASK		(3 << 8)
+#define TCR_ORGN_NC		(0 << 10)
+#define TCR_ORGN_WBWA		(1 << 10)
+#define TCR_ORGN_WT		(2 << 10)
+#define TCR_ORGN_WBnWA		(3 << 10)
+#define TCR_ORGN_MASK		(3 << 10)
+#define TCR_SHARED		(3 << 12)
+#define TCR_TG0_4K		(0 << 14)
+#define TCR_TG0_64K		(1 << 14)
+#define TCR_TG0_16K		(2 << 14)
+#define TCR_IPS_40BIT		(2 << 16)
+
+/* PTWs cacheable, inner/outer WBWA not shareable */
+#define TCR_FLAGS		TCR_IRGN_WBWA | TCR_ORGN_WBWA
+
+#endif /* _ASM_ARMV8_MMU_H_ */
diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h
index c3489f1..a02d989 100644
--- a/arch/arm/include/asm/byteorder.h
+++ b/arch/arm/include/asm/byteorder.h
@@ -23,10 +23,22 @@ 
 #  define __SWAB_64_THRU_32__
 #endif
 
+#ifndef CONFIG_ARMV8
+
 #ifdef __ARMEB__
 #include <linux/byteorder/big_endian.h>
 #else
 #include <linux/byteorder/little_endian.h>
 #endif
 
+#else	/* CONFIG_ARMV8 */
+
+#ifdef __AARCH64EB__
+#include <linux/byteorder/big_endian.h>
+#else
+#include <linux/byteorder/little_endian.h>
+#endif
+
+#endif	/* CONFIG_ARMV8 */
+
 #endif
diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
index 99b703e..30f008e 100644
--- a/arch/arm/include/asm/config.h
+++ b/arch/arm/include/asm/config.h
@@ -9,4 +9,14 @@ 
 
 #define CONFIG_LMB
 #define CONFIG_SYS_BOOT_RAMDISK_HIGH
+
+#ifdef CONFIG_ARMV8
+/*
+ * Currently, GOT is used to relocate u-boot and
+ * configuration CONFIG_NEEDS_MANUAL_RELOC is needed.
+ */
+#define CONFIG_NEEDS_MANUAL_RELOC
+#define CONFIG_PHYS_64BIT
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
index 79a9597..27b82b9 100644
--- a/arch/arm/include/asm/global_data.h
+++ b/arch/arm/include/asm/global_data.h
@@ -47,6 +47,10 @@  struct arch_global_data {
 
 #include <asm-generic/global_data.h>
 
-#define DECLARE_GLOBAL_DATA_PTR     register volatile gd_t *gd asm ("r8")
+#ifndef CONFIG_ARMV8
+#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("r8")
+#else
+#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("x18")
+#endif
 
 #endif /* __ASM_GBL_DATA_H */
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1fbc531..dfb85e8 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -75,42 +75,42 @@  static inline phys_addr_t virt_to_phys(void * vaddr)
 #define __arch_putw(v,a)		(*(volatile unsigned short *)(a) = (v))
 #define __arch_putl(v,a)		(*(volatile unsigned int *)(a) = (v))
 
-extern inline void __raw_writesb(unsigned int addr, const void *data, int bytelen)
+extern inline void __raw_writesb(unsigned long addr, const void *data, int bytelen)
 {
 	uint8_t *buf = (uint8_t *)data;
 	while(bytelen--)
 		__arch_putb(*buf++, addr);
 }
 
-extern inline void __raw_writesw(unsigned int addr, const void *data, int wordlen)
+extern inline void __raw_writesw(unsigned long addr, const void *data, int wordlen)
 {
 	uint16_t *buf = (uint16_t *)data;
 	while(wordlen--)
 		__arch_putw(*buf++, addr);
 }
 
-extern inline void __raw_writesl(unsigned int addr, const void *data, int longlen)
+extern inline void __raw_writesl(unsigned long addr, const void *data, int longlen)
 {
 	uint32_t *buf = (uint32_t *)data;
 	while(longlen--)
 		__arch_putl(*buf++, addr);
 }
 
-extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen)
+extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
 {
 	uint8_t *buf = (uint8_t *)data;
 	while(bytelen--)
 		*buf++ = __arch_getb(addr);
 }
 
-extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen)
+extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
 {
 	uint16_t *buf = (uint16_t *)data;
 	while(wordlen--)
 		*buf++ = __arch_getw(addr);
 }
 
-extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
+extern inline void __raw_readsl(unsigned long addr, void *data, int longlen)
 {
 	uint32_t *buf = (uint32_t *)data;
 	while(longlen--)
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
index ff13f36..70be196 100644
--- a/arch/arm/include/asm/macro.h
+++ b/arch/arm/include/asm/macro.h
@@ -54,5 +54,31 @@ 
 	bcs	1b
 .endm
 
+#ifdef CONFIG_ARMV8
+/*
+ * Register aliases.
+ */
+lr	.req	x30		// link register
+
+/*
+ * Stack pushing/popping (register pairs only).
+ * Equivalent to store decrement before,
+ * load increment after.
+ */
+.macro	push, xreg1, xreg2
+	stp	\xreg1, \xreg2, [sp, #-16]!
+.endm
+
+.macro	pop, xreg1, xreg2
+	ldp	\xreg1, \xreg2, [sp], #16
+.endm
+
+.macro	ventry	label
+	.align	7
+	b	\label
+.endm
+
+#endif	/* CONFIG_ARMV8 */
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_ARM_MACRO_H__ */
diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h
index c412486..0da38cb 100644
--- a/arch/arm/include/asm/posix_types.h
+++ b/arch/arm/include/asm/posix_types.h
@@ -19,6 +19,8 @@ 
  * assume GCC is being used.
  */
 
+#ifndef CONFIG_ARMV8
+
 typedef unsigned short		__kernel_dev_t;
 typedef unsigned long		__kernel_ino_t;
 typedef unsigned short		__kernel_mode_t;
@@ -44,6 +46,35 @@  typedef unsigned int		__kernel_gid32_t;
 typedef unsigned short		__kernel_old_uid_t;
 typedef unsigned short		__kernel_old_gid_t;
 
+#else	/* CONFIG_ARMV8 */
+
+typedef unsigned short		__kernel_dev_t;
+typedef unsigned long		__kernel_ino_t;
+typedef unsigned short		__kernel_mode_t;
+typedef unsigned short		__kernel_nlink_t;
+typedef long			__kernel_off_t;
+typedef int			__kernel_pid_t;
+typedef unsigned short		__kernel_ipc_pid_t;
+typedef unsigned short		__kernel_uid_t;
+typedef unsigned short		__kernel_gid_t;
+typedef unsigned long		__kernel_size_t;
+typedef long			__kernel_ssize_t;
+typedef long			__kernel_ptrdiff_t;
+typedef long			__kernel_time_t;
+typedef long			__kernel_suseconds_t;
+typedef long			__kernel_clock_t;
+typedef long			__kernel_daddr_t;
+typedef char *			__kernel_caddr_t;
+typedef unsigned short		__kernel_uid16_t;
+typedef unsigned short		__kernel_gid16_t;
+typedef unsigned int		__kernel_uid32_t;
+typedef unsigned int		__kernel_gid32_t;
+
+typedef __kernel_uid_t		__kernel_old_uid_t;
+typedef __kernel_gid_t		__kernel_old_gid_t;
+
+#endif	/* CONFIG_ARMV8 */
+
 #ifdef __GNUC__
 typedef long long		__kernel_loff_t;
 #endif
diff --git a/arch/arm/include/asm/proc-armv/ptrace.h b/arch/arm/include/asm/proc-armv/ptrace.h
index 79cc644..69822a2 100644
--- a/arch/arm/include/asm/proc-armv/ptrace.h
+++ b/arch/arm/include/asm/proc-armv/ptrace.h
@@ -12,6 +12,8 @@ 
 
 #include <linux/config.h>
 
+#ifndef CONFIG_ARMV8
+
 #define USR26_MODE	0x00
 #define FIQ26_MODE	0x01
 #define IRQ26_MODE	0x02
@@ -106,4 +108,40 @@  static inline int valid_user_regs(struct pt_regs *regs)
 
 #endif	/* __ASSEMBLY__ */
 
+#else	/* CONFIG_ARMV8 */
+
+#define PCMASK		0
+
+#define S_X0		(0)	/* offsetof(struct pt_regs, regs[0]) */
+#define S_X1		(8)	/* offsetof(struct pt_regs, regs[1]) */
+#define S_X2		(16)	/* offsetof(struct pt_regs, regs[2]) */
+#define S_X3		(24	/* offsetof(struct pt_regs, regs[3]) */
+#define S_X4		(32)	/* offsetof(struct pt_regs, regs[4]) */
+#define S_X5		(40)	/* offsetof(struct pt_regs, regs[5]) */
+#define S_X6		(48)	/* offsetof(struct pt_regs, regs[6]) */
+#define S_X7		(56)	/* offsetof(struct pt_regs, regs[7]) */
+#define S_LR		(240)	/* offsetof(struct pt_regs, regs[30]) */
+#define S_SP		(248)	/* offsetof(struct pt_regs, sp) */
+#define S_PC		(256)	/* offsetof(struct pt_regs, pc) */
+#define S_PSTATE	(264)	/* offsetof(struct pt_regs, pstate) */
+#define S_FRAME_SIZE	(272)	/* sizeof(struct pt_regs) */
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This struct defines the way the registers are stored on the stack during an
+ * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for
+ * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs.
+ */
+struct pt_regs {
+	u64 regs[31];
+	u64 sp;
+	u64 pc;
+	u64 pstate;
+};
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* CONFIG_ARMV8 */
+
 #endif
diff --git a/arch/arm/include/asm/proc-armv/system.h b/arch/arm/include/asm/proc-armv/system.h
index b4cfa68..fd98b7b 100644
--- a/arch/arm/include/asm/proc-armv/system.h
+++ b/arch/arm/include/asm/proc-armv/system.h
@@ -15,6 +15,8 @@ 
 /*
  * Save the current interrupt enable state & disable IRQs
  */
+#ifndef CONFIG_ARMV8
+
 #define local_irq_save(x)					\
 	({							\
 		unsigned long temp;				\
@@ -109,7 +111,61 @@ 
 	: "r" (x)						\
 	: "memory")
 
-#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
+#else	/* CONFIG_ARMV8 */
+
+/*
+ * Save the current interrupt enable state
+ * and disable IRQs/FIQs
+ */
+#define local_irq_save(flags)			\
+	({					\
+		asm volatile(			\
+			"mrs	%0, daif"	\
+			"msr	daifset, #3"	\
+			: "=r" (flags)		\
+			:			\
+			: "memory");		\
+	})
+
+/*
+ * restore saved IRQ & FIQ state
+ */
+#define local_irq_restore(flags)		\
+	({					\
+		asm volatile(			\
+			"msr	daif, %0"	\
+			:			\
+			: "r" (flags)		\
+			: "memory");		\
+	})
+
+/*
+ * Enable IRQs/FIQs
+ */
+#define local_irq_enable()			\
+	({					\
+		asm volatile(			\
+			"msr	daifclr, #3"	\
+			:			\
+			:			\
+			: "memory");		\
+	})
+
+/*
+ * Disable IRQs/FIQs
+ */
+#define local_irq_disable()			\
+	({					\
+		asm volatile(			\
+			"msr	daifset, #3"	\
+			:			\
+			:			\
+			: "memory");		\
+	})
+
+#endif	/* CONFIG_ARMV8 */
+
+#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || defined(CONFIG_ARMV8)
 /*
  * On the StrongARM, "swp" is terminally broken since it bypasses the
  * cache totally.  This means that the cache becomes inconsistent, and,
diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
index 71dc049..128dd3e 100644
--- a/arch/arm/include/asm/types.h
+++ b/arch/arm/include/asm/types.h
@@ -18,8 +18,13 @@  typedef __signed__ int __s32;
 typedef unsigned int __u32;
 
 #if defined(__GNUC__)
+#ifndef	CONFIG_ARMV8
 __extension__ typedef __signed__ long long __s64;
 __extension__ typedef unsigned long long __u64;
+#else	/* CONFIG_ARMV8 */
+__extension__ typedef __signed__ long __s64;
+__extension__ typedef unsigned long __u64;
+#endif	/* CONFIG_ARMV8 */
 #endif
 
 /*
@@ -36,10 +41,19 @@  typedef unsigned short u16;
 typedef signed int s32;
 typedef unsigned int u32;
 
+#ifndef	CONFIG_ARMV8
 typedef signed long long s64;
 typedef unsigned long long u64;
+#else	/* CONFIG_ARMV8 */
+typedef signed long s64;
+typedef unsigned long u64;
+#endif	/* CONFIG_ARMV8 */
 
+#ifndef	CONFIG_ARMV8
 #define BITS_PER_LONG 32
+#else	/* CONFIG_ARMV8 */
+#define BITS_PER_LONG 64
+#endif	/* CONFIG_ARMV8 */
 
 /* Dma addresses are 32-bits wide.  */
 
diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h
index 2b5fce8..3ef5538 100644
--- a/arch/arm/include/asm/u-boot.h
+++ b/arch/arm/include/asm/u-boot.h
@@ -44,6 +44,10 @@  typedef struct bd_info {
 #endif /* !CONFIG_SYS_GENERIC_BOARD */
 
 /* For image.h:image_check_target_arch() */
+#ifndef CONFIG_ARMV8
 #define IH_ARCH_DEFAULT IH_ARCH_ARM
+#else
+#define IH_ARCH_DEFAULT IH_ARCH_ARM64
+#endif
 
 #endif	/* _U_BOOT_H_ */
diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
index 44593a8..a051e97 100644
--- a/arch/arm/include/asm/unaligned.h
+++ b/arch/arm/include/asm/unaligned.h
@@ -8,6 +8,8 @@ 
 /*
  * Select endianness
  */
+#ifndef CONFIG_ARMV8
+
 #ifndef __ARMEB__
 #define get_unaligned	__get_unaligned_le
 #define put_unaligned	__put_unaligned_le
@@ -16,4 +18,16 @@ 
 #define put_unaligned	__put_unaligned_be
 #endif
 
+#else	/* CONFIG_ARMV8 */
+
+#ifndef __AARCH64EB__
+#define get_unaligned	__get_unaligned_le
+#define put_unaligned	__put_unaligned_le
+#else
+#define get_unaligned	__get_unaligned_be
+#define put_unaligned	__put_unaligned_be
+#endif
+
+#endif	/* CONFIG_ARMV8 */
+
 #endif /* _ASM_ARM_UNALIGNED_H */
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 4e78723..86b19e2 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -10,6 +10,7 @@  include $(TOPDIR)/config.mk
 LIB	= $(obj)lib$(ARCH).o
 LIBGCC	= $(obj)libgcc.o
 
+ifndef CONFIG_ARMV8
 GLSOBJS	+= _ashldi3.o
 GLSOBJS	+= _ashrdi3.o
 GLSOBJS	+= _divsi3.o
@@ -21,9 +22,12 @@  GLSOBJS	+= _umodsi3.o
 GLCOBJS	+= div0.o
 
 SOBJS-y += crt0.o
+endif
 
 ifndef CONFIG_SPL_BUILD
+ifndef CONFIG_ARMV8
 SOBJS-y += relocate.o
+endif
 ifndef CONFIG_SYS_GENERIC_BOARD
 COBJS-y	+= board.o
 endif
@@ -38,11 +42,15 @@  else
 COBJS-$(CONFIG_SPL_FRAMEWORK) += spl.o
 endif
 
+ifndef CONFIG_ARMV8
 COBJS-y	+= interrupts.o
+endif
 COBJS-y	+= reset.o
 
 COBJS-y	+= cache.o
+ifndef CONFIG_ARMV8
 COBJS-y	+= cache-cp15.o
+endif
 
 SRCS	:= $(GLSOBJS:.o=.S) $(GLCOBJS:.o=.c) \
 	   $(SOBJS-y:.o=.S) $(COBJS-y:.o=.c)
diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
index 34f50b0..2a92d69 100644
--- a/arch/arm/lib/board.c
+++ b/arch/arm/lib/board.c
@@ -344,7 +344,11 @@  void board_init_f(ulong bootflag)
 
 #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
 	/* reserve TLB table */
+#ifndef CONFIG_ARMV8
 	gd->arch.tlb_size = 4096 * 4;
+#else
+	gd->arch.tlb_size = 0x10000;
+#endif
 	addr -= gd->arch.tlb_size;
 
 	/* round down to next 64 kB limit */
@@ -419,6 +423,7 @@  void board_init_f(ulong bootflag)
 	}
 #endif
 
+#ifndef CONFIG_ARMV8
 	/* setup stackpointer for exeptions */
 	gd->irq_sp = addr_sp;
 #ifdef CONFIG_USE_IRQ
@@ -431,6 +436,10 @@  void board_init_f(ulong bootflag)
 
 	/* 8-byte alignment for ABI compliance */
 	addr_sp &= ~0x07;
+#else	/* CONFIG_ARMV8 */
+	/* 16-byte alignment for ABI compliance */
+	addr_sp &= ~0x0f;
+#endif	/* CONFIG_ARMV8 */
 #else
 	addr_sp += 128;	/* leave 32 words for abort-stack   */
 	gd->irq_sp = addr_sp;
@@ -523,6 +532,15 @@  void board_init_r(gd_t *id, ulong dest_addr)
 
 	debug("monitor flash len: %08lX\n", monitor_flash_len);
 	board_init();	/* Setup chipselects */
+
+#ifdef CONFIG_NEEDS_MANUAL_RELOC
+	/*
+	 * We have to relocate the command table manually
+	 */
+	fixup_cmdtable(ll_entry_start(cmd_tbl_t, cmd),
+			ll_entry_count(cmd_tbl_t, cmd));
+#endif /* CONFIG_NEEDS_MANUAL_RELOC */
+
 	/*
 	 * TODO: printing of the clock inforamtion of the board is now
 	 * implemented as part of bdinfo command. Currently only support for
diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
index eefb456..d9a86b9 100644
--- a/arch/arm/lib/bootm.c
+++ b/arch/arm/lib/bootm.c
@@ -222,6 +222,7 @@  static void boot_prep_linux(bootm_headers_t *images)
 /* Subcommand: GO */
 static void boot_jump_linux(bootm_headers_t *images, int flag)
 {
+#ifndef CONFIG_ARMV8
 	unsigned long machid = gd->bd->bi_arch_number;
 	char *s;
 	void (*kernel_entry)(int zero, int arch, uint params);
@@ -248,6 +249,21 @@  static void boot_jump_linux(bootm_headers_t *images, int flag)
 
 	if (!fake)
 		kernel_entry(0, machid, r2);
+#else
+	void (*kernel_entry)(void *fdt_addr);
+	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
+
+	kernel_entry = (void (*)(void *fdt_addr))images->ep;
+
+	debug("## Transferring control to Linux (at address %lx)" \
+		"...\n", (ulong)kernel_entry);
+	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
+
+	announce_and_cleanup(fake);
+
+	if (!fake)
+		kernel_entry(images->ft_addr);
+#endif
 }
 
 /* Main Entry point for arm bootm implementation
diff --git a/common/image.c b/common/image.c
index 56a5a62..7182549 100644
--- a/common/image.c
+++ b/common/image.c
@@ -81,6 +81,7 @@  static const table_entry_t uimage_arch[] = {
 	{	IH_ARCH_NDS32,		"nds32",	"NDS32",	},
 	{	IH_ARCH_OPENRISC,	"or1k",		"OpenRISC 1000",},
 	{	IH_ARCH_SANDBOX,	"sandbox",	"Sandbox",	},
+	{	IH_ARCH_ARM64,		"arm64",	"AArch64",	},
 	{	-1,			"",		"",		},
 };
 
diff --git a/doc/README.armv8 b/doc/README.armv8
new file mode 100644
index 0000000..737c875
--- /dev/null
+++ b/doc/README.armv8
@@ -0,0 +1,14 @@ 
+Notes:
+
+1. Currenly, u-boot running at EL2.
+
+2. GOT is used to relocate u-boot and CONFIG_NEEDS_MANUAL_RELOC is needed.
+
+3. Fdt should be placed in the first 512 megabytes from the start of the kernel image.
+   So, fdt_high should be defined specially. Please reference linux/Documentation/arm64/booting.txt.
+
+4. Generic board is supported.
+
+TODO:
+
+1. So much print info are formated with %08lx, maybe it should be modified to %016lx.
diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c
index 8fb1765..10676f1 100644
--- a/examples/standalone/stubs.c
+++ b/examples/standalone/stubs.c
@@ -39,6 +39,7 @@  gd_t *global_data;
 "	bctr\n"				\
 	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11");
 #elif defined(CONFIG_ARM)
+#ifndef CONFIG_ARMV8
 /*
  * r8 holds the pointer to the global_data, ip is a call-clobbered
  * register
@@ -50,6 +51,20 @@  gd_t *global_data;
 "	ldr	ip, [r8, %0]\n"		\
 "	ldr	pc, [ip, %1]\n"		\
 	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip");
+#else
+/*
+ * x18 holds the pointer to the global_data, x9 is a call-clobbered
+ * register
+ */
+#define EXPORT_FUNC(x) \
+	asm volatile (			\
+"	.globl " #x "\n"		\
+#x ":\n"				\
+"	ldr	x9, [x18, %0]\n"		\
+"	ldr	x9, [x9, %1]\n"		\
+"	br	x9\n"		\
+	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x19");
+#endif
 #elif defined(CONFIG_MIPS)
 /*
  * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call-
diff --git a/include/image.h b/include/image.h
index f93a393..12262d7 100644
--- a/include/image.h
+++ b/include/image.h
@@ -156,6 +156,7 @@  struct lmb;
 #define IH_ARCH_SANDBOX		19	/* Sandbox architecture (test only) */
 #define IH_ARCH_NDS32	        20	/* ANDES Technology - NDS32  */
 #define IH_ARCH_OPENRISC        21	/* OpenRISC 1000  */
+#define IH_ARCH_ARM64		22	/* ARM64	*/
 
 /*
  * Image Types