Patchwork [v2,02/12] ARM: Prepare translation for AArch64 code

login
register
mail settings
Submitter John Rigby
Date April 30, 2013, 6:36 a.m.
Message ID <1367303797-9561-1-git-send-email-john.rigby@linaro.org>
Download mbox | patch
Permalink /patch/240559/
State New
Headers show

Comments

John Rigby - April 30, 2013, 6:36 a.m.
From: Alexander Graf <agraf@suse.de>

This patch adds all the prerequisites for AArch64 support that didn't
fit into split up patches. It extends important bits in the core cpu
headers to also take AArch64 mode into account.

Add new ARM_TBFLAG_AARCH64_STATE flag that indicates an ARMv8 cpu
running in aarch64 mode vs aarch32 mode.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: John Rigby <john.rigby@linaro.org>
---
Changes in v2:
- remove many uses of is_a64 as that is a indicator the target arch is
aarch64 not what mode it is running in.
- add ARM_TBFLAG_AARCH64_STATE and aarch64_state 

 include/elf.h          |    2 +
 target-arm/cpu.h       |  138 ++++++++++++++++++++++++++++++++++--------------
 target-arm/translate.c |   14 +++--
 3 files changed, 108 insertions(+), 46 deletions(-)
Alexander Graf - April 30, 2013, 9:36 a.m.
On 30.04.2013, at 08:36, John Rigby wrote:

> From: Alexander Graf <agraf@suse.de>
> 
> This patch adds all the prerequisites for AArch64 support that didn't
> fit into split up patches. It extends important bits in the core cpu
> headers to also take AArch64 mode into account.
> 
> Add new ARM_TBFLAG_AARCH64_STATE flag that indicates an ARMv8 cpu
> running in aarch64 mode vs aarch32 mode.
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> Signed-off-by: John Rigby <john.rigby@linaro.org>
> ---
> Changes in v2:
> - remove many uses of is_a64 as that is a indicator the target arch is
> aarch64 not what mode it is running in.

It indicates what mode the CPU is running on. The only reason it was defined statically so far, was that linux-user was the only target I implemented. And that one can't switch during runtime.

Alex

> - add ARM_TBFLAG_AARCH64_STATE and aarch64_state 
> 
> include/elf.h          |    2 +
> target-arm/cpu.h       |  138 ++++++++++++++++++++++++++++++++++--------------
> target-arm/translate.c |   14 +++--
> 3 files changed, 108 insertions(+), 46 deletions(-)
> 
> diff --git a/include/elf.h b/include/elf.h
> index a21ea53..0ff0ea6 100644
> --- a/include/elf.h
> +++ b/include/elf.h
> @@ -109,6 +109,8 @@ typedef int64_t  Elf64_Sxword;
> #define EM_OPENRISC     92        /* OpenCores OpenRISC */
> 
> #define EM_UNICORE32    110     /* UniCore32 */
> +#define EM_AARCH64      183     /* ARM 64-bit architecture */
> +
> 
> /*
>  * This is an interim value that we will use until the committee comes
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index 5438444..3a79dfa 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -19,13 +19,19 @@
> #ifndef CPU_ARM_H
> #define CPU_ARM_H
> 
> -#define TARGET_LONG_BITS 32
> +#include "config.h"
> 
> -#define ELF_MACHINE	EM_ARM
> +#if defined (TARGET_AARCH64)
> +  /* AArch64 definitions */
> +#  define TARGET_LONG_BITS 64
> +#  define ELF_MACHINE	EM_AARCH64
> +#else
> +#  define TARGET_LONG_BITS 32
> +#  define ELF_MACHINE	EM_ARM
> +#endif
> 
> #define CPUArchState struct CPUARMState
> 
> -#include "config.h"
> #include "qemu-common.h"
> #include "exec/cpu-defs.h"
> 
> @@ -79,6 +85,14 @@ struct arm_boot_info;
> typedef struct CPUARMState {
>     /* Regs for current mode.  */
>     uint32_t regs[16];
> +
> +    /* Regs for A64 mode.  */
> +    uint64_t xregs[31];
> +    uint64_t pc;
> +    uint64_t sp;
> +    uint32_t pstate;
> +    uint32_t aarch64_state; /* 1 if CPU is in aarch64 state */
> +
>     /* Frequently accessed CPSR bits are stored separately for efficiency.
>        This contains all the other bits.  Use cpsr_{read,write} to access
>        the whole CPSR.  */
> @@ -154,6 +168,11 @@ typedef struct CPUARMState {
>         uint32_t c15_power_control; /* power control */
>     } cp15;
> 
> +    /* System registers (AArch64) */
> +    struct {
> +        uint64_t tpidr_el0;
> +    } sr;
> +
>     struct {
>         uint32_t other_sp;
>         uint32_t vecbase;
> @@ -170,7 +189,7 @@ typedef struct CPUARMState {
> 
>     /* VFP coprocessor state.  */
>     struct {
> -        float64 regs[32];
> +        float64 regs[64];
> 
>         uint32_t xregs[16];
>         /* We store these fpcsr fields separately for convenience.  */
> @@ -240,6 +259,24 @@ int bank_number(int mode);
> void switch_mode(CPUARMState *, int);
> uint32_t do_arm_semihosting(CPUARMState *env);
> 
> +static inline bool is_a64(CPUARMState *env)
> +{
> +#ifdef TARGET_AARCH64
> +    return true;
> +#else
> +    return false;
> +#endif
> +}
> +
> +#define PSTATE_N_SHIFT 3
> +#define PSTATE_N  (1 << PSTATE_N_SHIFT)
> +#define PSTATE_Z_SHIFT 2
> +#define PSTATE_Z  (1 << PSTATE_Z_SHIFT)
> +#define PSTATE_C_SHIFT 1
> +#define PSTATE_C  (1 << PSTATE_C_SHIFT)
> +#define PSTATE_V_SHIFT 0
> +#define PSTATE_V  (1 << PSTATE_V_SHIFT)
> +
> /* you can call this signal handler from your SIGBUS and SIGSEGV
>    signal handlers to inform the virtual CPU of exceptions. non zero
>    is returned if the signal was handled by the virtual CPU.  */
> @@ -623,8 +660,13 @@ static inline bool cp_access_ok(CPUARMState *env,
> #define TARGET_PAGE_BITS 10
> #endif
> 
> -#define TARGET_PHYS_ADDR_SPACE_BITS 40
> -#define TARGET_VIRT_ADDR_SPACE_BITS 32
> +#if defined (TARGET_AARCH64)
> +#  define TARGET_PHYS_ADDR_SPACE_BITS 64
> +#  define TARGET_VIRT_ADDR_SPACE_BITS 64
> +#else
> +#  define TARGET_PHYS_ADDR_SPACE_BITS 40
> +#  define TARGET_VIRT_ADDR_SPACE_BITS 32
> +#endif
> 
> static inline CPUARMState *cpu_init(const char *cpu_model)
> {
> @@ -661,21 +703,23 @@ static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp)
> #include "exec/cpu-all.h"
> 
> /* Bit usage in the TB flags field: */
> -#define ARM_TBFLAG_THUMB_SHIFT      0
> -#define ARM_TBFLAG_THUMB_MASK       (1 << ARM_TBFLAG_THUMB_SHIFT)
> -#define ARM_TBFLAG_VECLEN_SHIFT     1
> -#define ARM_TBFLAG_VECLEN_MASK      (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
> -#define ARM_TBFLAG_VECSTRIDE_SHIFT  4
> -#define ARM_TBFLAG_VECSTRIDE_MASK   (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
> -#define ARM_TBFLAG_PRIV_SHIFT       6
> -#define ARM_TBFLAG_PRIV_MASK        (1 << ARM_TBFLAG_PRIV_SHIFT)
> -#define ARM_TBFLAG_VFPEN_SHIFT      7
> -#define ARM_TBFLAG_VFPEN_MASK       (1 << ARM_TBFLAG_VFPEN_SHIFT)
> -#define ARM_TBFLAG_CONDEXEC_SHIFT   8
> -#define ARM_TBFLAG_CONDEXEC_MASK    (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
> -#define ARM_TBFLAG_BSWAP_CODE_SHIFT 16
> -#define ARM_TBFLAG_BSWAP_CODE_MASK  (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
> -/* Bits 31..17 are currently unused. */
> +#define ARM_TBFLAG_THUMB_SHIFT         0
> +#define ARM_TBFLAG_THUMB_MASK          (1 << ARM_TBFLAG_THUMB_SHIFT)
> +#define ARM_TBFLAG_VECLEN_SHIFT        1
> +#define ARM_TBFLAG_VECLEN_MASK         (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
> +#define ARM_TBFLAG_VECSTRIDE_SHIFT     4
> +#define ARM_TBFLAG_VECSTRIDE_MASK      (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
> +#define ARM_TBFLAG_PRIV_SHIFT          6
> +#define ARM_TBFLAG_PRIV_MASK           (1 << ARM_TBFLAG_PRIV_SHIFT)
> +#define ARM_TBFLAG_VFPEN_SHIFT         7
> +#define ARM_TBFLAG_VFPEN_MASK          (1 << ARM_TBFLAG_VFPEN_SHIFT)
> +#define ARM_TBFLAG_CONDEXEC_SHIFT      8
> +#define ARM_TBFLAG_CONDEXEC_MASK       (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
> +#define ARM_TBFLAG_BSWAP_CODE_SHIFT    16
> +#define ARM_TBFLAG_BSWAP_CODE_MASK     (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
> +#define ARM_TBFLAG_AARCH64_STATE_SHIFT 17
> +#define ARM_TBFLAG_AARCH64_STATE_MASK  (1 << ARM_TBFLAG_AARCH64_STATE_SHIFT)
> +/* Bits 31..18 are currently unused. */
> 
> /* some convenience accessor macros */
> #define ARM_TBFLAG_THUMB(F) \
> @@ -692,29 +736,37 @@ static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp)
>     (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
> #define ARM_TBFLAG_BSWAP_CODE(F) \
>     (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
> +#define ARM_TBFLAG_AARCH64_STATE(F) \
> +    (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >> ARM_TBFLAG_AARCH64_STATE_SHIFT)
> 
> static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
>                                         target_ulong *cs_base, int *flags)
> {
> -    int privmode;
> -    *pc = env->regs[15];
> -    *cs_base = 0;
> -    *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
> -        | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
> -        | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
> -        | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
> -        | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
> -    if (arm_feature(env, ARM_FEATURE_M)) {
> -        privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
> +    if (is_a64(env)) {
> +        *pc = env->pc;
> +        *flags = env->aarch64_state << ARM_TBFLAG_AARCH64_STATE_SHIFT;
>     } else {
> -        privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
> -    }
> -    if (privmode) {
> -        *flags |= ARM_TBFLAG_PRIV_MASK;
> -    }
> -    if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
> -        *flags |= ARM_TBFLAG_VFPEN_MASK;
> +        int privmode;
> +        *pc = env->regs[15];
> +        *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
> +            | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
> +            | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
> +            | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
> +            | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
> +        if (arm_feature(env, ARM_FEATURE_M)) {
> +            privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
> +        } else {
> +            privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
> +        }
> +        if (privmode) {
> +            *flags |= ARM_TBFLAG_PRIV_MASK;
> +        }
> +        if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
> +            *flags |= ARM_TBFLAG_VFPEN_MASK;
> +        }
>     }
> +
> +    *cs_base = 0;
> }
> 
> static inline bool cpu_has_work(CPUState *cpu)
> @@ -727,11 +779,15 @@ static inline bool cpu_has_work(CPUState *cpu)
> 
> static inline void cpu_pc_from_tb(CPUARMState *env, TranslationBlock *tb)
> {
> -    env->regs[15] = tb->pc;
> +    if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
> +        env->pc = tb->pc;
> +    } else {
> +        env->regs[15] = tb->pc;
> +    }
> }
> 
> /* Load an instruction and return it in the standard little-endian order */
> -static inline uint32_t arm_ldl_code(CPUARMState *env, uint32_t addr,
> +static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
>                                     bool do_swap)
> {
>     uint32_t insn = cpu_ldl_code(env, addr);
> @@ -742,7 +798,7 @@ static inline uint32_t arm_ldl_code(CPUARMState *env, uint32_t addr,
> }
> 
> /* Ditto, for a halfword (Thumb) instruction */
> -static inline uint16_t arm_lduw_code(CPUARMState *env, uint32_t addr,
> +static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
>                                      bool do_swap)
> {
>     uint16_t insn = cpu_lduw_code(env, addr);
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 36537bd..a4b6d8a 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -9750,7 +9750,7 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
>     uint16_t *gen_opc_end;
>     int j, lj;
>     target_ulong pc_start;
> -    uint32_t next_page_start;
> +    target_ulong next_page_start;
>     int num_insns;
>     int max_insns;
> 
> @@ -9838,7 +9838,7 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
>         /* Intercept jump to the magic kernel page.  */
>         if (dc->pc >= 0xffff0000) {
>             /* We always get here via a jump, so know we are not in a
> -               conditional execution block.  */
> +                conditional execution block.  */
>             gen_exception(EXCP_KERNEL_TRAP);
>             dc->is_jmp = DISAS_UPDATE;
>             break;
> @@ -9846,7 +9846,7 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
> #else
>         if (dc->pc >= 0xfffffff0 && IS_M(env)) {
>             /* We always get here via a jump, so know we are not in a
> -               conditional execution block.  */
> +                conditional execution block.  */
>             gen_exception(EXCP_EXCEPTION_EXIT);
>             dc->is_jmp = DISAS_UPDATE;
>             break;
> @@ -9905,7 +9905,7 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
>         }
> 
>         if (tcg_check_temp_count()) {
> -            fprintf(stderr, "TCG temporary leak before %08x\n", dc->pc);
> +            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n", dc->pc);
>         }
> 
>         /* Translation stops when a conditional branch is encountered.
> @@ -10075,6 +10075,10 @@ void cpu_dump_state(CPUARMState *env, FILE *f, fprintf_function cpu_fprintf,
> 
> void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
> {
> -    env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
> +    if (is_a64(env)) {
> +        env->pc = tcg_ctx.gen_opc_pc[pc_pos];
> +    } else {
> +        env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
> +    }
>     env->condexec_bits = gen_opc_condexec_bits[pc_pos];
> }
> -- 
> 1.7.9.5
>
Peter Maydell - April 30, 2013, 12:36 p.m.
On 30 April 2013 10:36, Alexander Graf <agraf@suse.de> wrote:
>
> On 30.04.2013, at 08:36, John Rigby wrote:
>> - remove many uses of is_a64 as that is a indicator the target arch is
>> aarch64 not what mode it is running in.
>
> It indicates what mode the CPU is running on. The only reason it
> was defined statically so far, was that linux-user was the only
> target I implemented. And that one can't switch during runtime.

Agreed. "target arch supports aarch64" is a compile time thing --
compare use of TARGET_X86_64 vs CODE64() in target-i386.

So there are actually three different kinds of check here:
 * TARGET_AARCH64 (or whatever we're using as the #define)
    -- indicates whether this CPU supports 64 bit mode
 * a function that tests env to see if we're currently
   running in 64 bit mode
 * testing the DisasContext to see whether we're currently
   translating for 64 or 32 bit [which is something to
   initialize from the tb* at the top of
   gen_intermediate_code_internal]

-- PMM
Richard Henderson - May 1, 2013, 9:32 a.m.
On 2013-04-30 07:36, John Rigby wrote:
>       uint32_t regs[16];
> +
> +    /* Regs for A64 mode.  */
> +    uint64_t xregs[31];
> +    uint64_t pc;
> +    uint64_t sp;
> +    uint32_t pstate;
> +    uint32_t aarch64_state; /* 1 if CPU is in aarch64 state */
> +

How do these registers overlap (or not) in real hardware?
Is it possible to union these with the 32-bit state?


r~
Laurent Desnogues - May 1, 2013, 10:19 a.m.
On Wednesday, May 1, 2013, Richard Henderson <rth@twiddle.net> wrote:
> On 2013-04-30 07:36, John Rigby wrote:
>>
>>       uint32_t regs[16];
>> +
>> +    /* Regs for A64 mode.  */
>> +    uint64_t xregs[31];
>> +    uint64_t pc;
>> +    uint64_t sp;
>> +    uint32_t pstate;
>> +    uint32_t aarch64_state; /* 1 if CPU is in aarch64 state */
>> +
>
> How do these registers overlap (or not) in real hardware?
> Is it possible to union these with the 32-bit state?

There is an overlap between 32- and 64-bit state registers,
but it's against the set of 32 (?) 32-bit registers that exist
across all modes, not against the 16 registers as used here.

IMHO it doesn't make sense to union them, the mapping
can be done when switching from 32- to 64-bit modes.


Laurent
Peter Maydell - May 1, 2013, 10:33 a.m.
On 1 May 2013 11:19, Laurent Desnogues <laurent.desnogues@gmail.com> wrote:
> On Wednesday, May 1, 2013, Richard Henderson <rth@twiddle.net> wrote:
>> On 2013-04-30 07:36, John Rigby wrote:
>>>
>>>       uint32_t regs[16];
>>> +
>>> +    /* Regs for A64 mode.  */
>>> +    uint64_t xregs[31];
>>> +    uint64_t pc;
>>> +    uint64_t sp;
>>> +    uint32_t pstate;
>>> +    uint32_t aarch64_state; /* 1 if CPU is in aarch64 state */
>>> +
>>
>> How do these registers overlap (or not) in real hardware?
>> Is it possible to union these with the 32-bit state?
>
> There is an overlap between 32- and 64-bit state registers,
> but it's against the set of 32 (?) 32-bit registers that exist
> across all modes, not against the 16 registers as used here.
>
> IMHO it doesn't make sense to union them, the mapping
> can be done when switching from 32- to 64-bit modes.

Agreed -- the 32/64 switch only ever happens when taking
or returning from an exception, so it is easier to get
the overlap semantics right with explicit code there
rather than try to change all the existing 32 bit code
to implicitly get things right. Plus as you say the
interaction with aarch32 register banking would make
unioning tricky.

-- PMM
John Rigby - May 2, 2013, 9:22 p.m.
On Wed, May 1, 2013 at 4:33 AM, Peter Maydell <peter.maydell@linaro.org>wrote:

> On 1 May 2013 11:19, Laurent Desnogues <laurent.desnogues@gmail.com>
> wrote:
> > On Wednesday, May 1, 2013, Richard Henderson <rth@twiddle.net> wrote:
> >> On 2013-04-30 07:36, John Rigby wrote:
> >>>
> >>>       uint32_t regs[16];
> >>> +
> >>> +    /* Regs for A64 mode.  */
> >>> +    uint64_t xregs[31];
> >>> +    uint64_t pc;
> >>> +    uint64_t sp;
> >>> +    uint32_t pstate;
> >>> +    uint32_t aarch64_state; /* 1 if CPU is in aarch64 state */
> >>> +
> >>
> >> How do these registers overlap (or not) in real hardware?
> >> Is it possible to union these with the 32-bit state?
> >
> > There is an overlap between 32- and 64-bit state registers,
> > but it's against the set of 32 (?) 32-bit registers that exist
> > across all modes, not against the 16 registers as used here.
> >
> > IMHO it doesn't make sense to union them, the mapping
> > can be done when switching from 32- to 64-bit modes.
>
> Agreed -- the 32/64 switch only ever happens when taking
> or returning from an exception, so it is easier to get
> the overlap semantics right with explicit code there
> rather than try to change all the existing 32 bit code
> to implicitly get things right. Plus as you say the
> interaction with aarch32 register banking would make
> unioning tricky.
>
> I think the other thing we need here is a comment stating exactly what

peter has said explaining why the two sets of registers exist and that the
aarch64 field indicates which set is active.

--john
John Rigby - May 2, 2013, 9:25 p.m.
On Tue, Apr 30, 2013 at 6:36 AM, Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On 30 April 2013 10:36, Alexander Graf <agraf@suse.de> wrote:
> >
> > On 30.04.2013, at 08:36, John Rigby wrote:
> >> - remove many uses of is_a64 as that is a indicator the target arch is
> >> aarch64 not what mode it is running in.
> >
> > It indicates what mode the CPU is running on. The only reason it
> > was defined statically so far, was that linux-user was the only
> > target I implemented. And that one can't switch during runtime.
>
> Agreed. "target arch supports aarch64" is a compile time thing --
> compare use of TARGET_X86_64 vs CODE64() in target-i386.
>
> So there are actually three different kinds of check here:
>  * TARGET_AARCH64 (or whatever we're using as the #define)
>     -- indicates whether this CPU supports 64 bit mode
Yes,  TARGET_AARCH64 is the define so we have this.
>  * a function that tests env to see if we're currently
>    running in 64 bit mode
Using thumb as an example I added  aarch64_state to serve the same
purpose for 32 vs 64.  I propose renaming this to just aarch64 and
then testing it in is_a64.
>  * testing the DisasContext to see whether we're currently
>    translating for 64 or 32 bit [which is something to
>    initialize from the tb* at the top of
>    gen_intermediate_code_internal]
This is what the new  ARM_TBFLAG_AARCH64_STATE* defines are for so
with some cleanup we have all three?

>
> -- PMM

Patch

diff --git a/include/elf.h b/include/elf.h
index a21ea53..0ff0ea6 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -109,6 +109,8 @@  typedef int64_t  Elf64_Sxword;
 #define EM_OPENRISC     92        /* OpenCores OpenRISC */
 
 #define EM_UNICORE32    110     /* UniCore32 */
+#define EM_AARCH64      183     /* ARM 64-bit architecture */
+
 
 /*
  * This is an interim value that we will use until the committee comes
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 5438444..3a79dfa 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -19,13 +19,19 @@ 
 #ifndef CPU_ARM_H
 #define CPU_ARM_H
 
-#define TARGET_LONG_BITS 32
+#include "config.h"
 
-#define ELF_MACHINE	EM_ARM
+#if defined (TARGET_AARCH64)
+  /* AArch64 definitions */
+#  define TARGET_LONG_BITS 64
+#  define ELF_MACHINE	EM_AARCH64
+#else
+#  define TARGET_LONG_BITS 32
+#  define ELF_MACHINE	EM_ARM
+#endif
 
 #define CPUArchState struct CPUARMState
 
-#include "config.h"
 #include "qemu-common.h"
 #include "exec/cpu-defs.h"
 
@@ -79,6 +85,14 @@  struct arm_boot_info;
 typedef struct CPUARMState {
     /* Regs for current mode.  */
     uint32_t regs[16];
+
+    /* Regs for A64 mode.  */
+    uint64_t xregs[31];
+    uint64_t pc;
+    uint64_t sp;
+    uint32_t pstate;
+    uint32_t aarch64_state; /* 1 if CPU is in aarch64 state */
+
     /* Frequently accessed CPSR bits are stored separately for efficiency.
        This contains all the other bits.  Use cpsr_{read,write} to access
        the whole CPSR.  */
@@ -154,6 +168,11 @@  typedef struct CPUARMState {
         uint32_t c15_power_control; /* power control */
     } cp15;
 
+    /* System registers (AArch64) */
+    struct {
+        uint64_t tpidr_el0;
+    } sr;
+
     struct {
         uint32_t other_sp;
         uint32_t vecbase;
@@ -170,7 +189,7 @@  typedef struct CPUARMState {
 
     /* VFP coprocessor state.  */
     struct {
-        float64 regs[32];
+        float64 regs[64];
 
         uint32_t xregs[16];
         /* We store these fpcsr fields separately for convenience.  */
@@ -240,6 +259,24 @@  int bank_number(int mode);
 void switch_mode(CPUARMState *, int);
 uint32_t do_arm_semihosting(CPUARMState *env);
 
+static inline bool is_a64(CPUARMState *env)
+{
+#ifdef TARGET_AARCH64
+    return true;
+#else
+    return false;
+#endif
+}
+
+#define PSTATE_N_SHIFT 3
+#define PSTATE_N  (1 << PSTATE_N_SHIFT)
+#define PSTATE_Z_SHIFT 2
+#define PSTATE_Z  (1 << PSTATE_Z_SHIFT)
+#define PSTATE_C_SHIFT 1
+#define PSTATE_C  (1 << PSTATE_C_SHIFT)
+#define PSTATE_V_SHIFT 0
+#define PSTATE_V  (1 << PSTATE_V_SHIFT)
+
 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
    is returned if the signal was handled by the virtual CPU.  */
@@ -623,8 +660,13 @@  static inline bool cp_access_ok(CPUARMState *env,
 #define TARGET_PAGE_BITS 10
 #endif
 
-#define TARGET_PHYS_ADDR_SPACE_BITS 40
-#define TARGET_VIRT_ADDR_SPACE_BITS 32
+#if defined (TARGET_AARCH64)
+#  define TARGET_PHYS_ADDR_SPACE_BITS 64
+#  define TARGET_VIRT_ADDR_SPACE_BITS 64
+#else
+#  define TARGET_PHYS_ADDR_SPACE_BITS 40
+#  define TARGET_VIRT_ADDR_SPACE_BITS 32
+#endif
 
 static inline CPUARMState *cpu_init(const char *cpu_model)
 {
@@ -661,21 +703,23 @@  static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp)
 #include "exec/cpu-all.h"
 
 /* Bit usage in the TB flags field: */
-#define ARM_TBFLAG_THUMB_SHIFT      0
-#define ARM_TBFLAG_THUMB_MASK       (1 << ARM_TBFLAG_THUMB_SHIFT)
-#define ARM_TBFLAG_VECLEN_SHIFT     1
-#define ARM_TBFLAG_VECLEN_MASK      (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
-#define ARM_TBFLAG_VECSTRIDE_SHIFT  4
-#define ARM_TBFLAG_VECSTRIDE_MASK   (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
-#define ARM_TBFLAG_PRIV_SHIFT       6
-#define ARM_TBFLAG_PRIV_MASK        (1 << ARM_TBFLAG_PRIV_SHIFT)
-#define ARM_TBFLAG_VFPEN_SHIFT      7
-#define ARM_TBFLAG_VFPEN_MASK       (1 << ARM_TBFLAG_VFPEN_SHIFT)
-#define ARM_TBFLAG_CONDEXEC_SHIFT   8
-#define ARM_TBFLAG_CONDEXEC_MASK    (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
-#define ARM_TBFLAG_BSWAP_CODE_SHIFT 16
-#define ARM_TBFLAG_BSWAP_CODE_MASK  (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
-/* Bits 31..17 are currently unused. */
+#define ARM_TBFLAG_THUMB_SHIFT         0
+#define ARM_TBFLAG_THUMB_MASK          (1 << ARM_TBFLAG_THUMB_SHIFT)
+#define ARM_TBFLAG_VECLEN_SHIFT        1
+#define ARM_TBFLAG_VECLEN_MASK         (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
+#define ARM_TBFLAG_VECSTRIDE_SHIFT     4
+#define ARM_TBFLAG_VECSTRIDE_MASK      (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
+#define ARM_TBFLAG_PRIV_SHIFT          6
+#define ARM_TBFLAG_PRIV_MASK           (1 << ARM_TBFLAG_PRIV_SHIFT)
+#define ARM_TBFLAG_VFPEN_SHIFT         7
+#define ARM_TBFLAG_VFPEN_MASK          (1 << ARM_TBFLAG_VFPEN_SHIFT)
+#define ARM_TBFLAG_CONDEXEC_SHIFT      8
+#define ARM_TBFLAG_CONDEXEC_MASK       (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
+#define ARM_TBFLAG_BSWAP_CODE_SHIFT    16
+#define ARM_TBFLAG_BSWAP_CODE_MASK     (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_AARCH64_STATE_SHIFT 17
+#define ARM_TBFLAG_AARCH64_STATE_MASK  (1 << ARM_TBFLAG_AARCH64_STATE_SHIFT)
+/* Bits 31..18 are currently unused. */
 
 /* some convenience accessor macros */
 #define ARM_TBFLAG_THUMB(F) \
@@ -692,29 +736,37 @@  static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp)
     (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
 #define ARM_TBFLAG_BSWAP_CODE(F) \
     (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_AARCH64_STATE(F) \
+    (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >> ARM_TBFLAG_AARCH64_STATE_SHIFT)
 
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
-    int privmode;
-    *pc = env->regs[15];
-    *cs_base = 0;
-    *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
-        | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
-        | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
-        | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
-        | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
-    if (arm_feature(env, ARM_FEATURE_M)) {
-        privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
+    if (is_a64(env)) {
+        *pc = env->pc;
+        *flags = env->aarch64_state << ARM_TBFLAG_AARCH64_STATE_SHIFT;
     } else {
-        privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
-    }
-    if (privmode) {
-        *flags |= ARM_TBFLAG_PRIV_MASK;
-    }
-    if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
-        *flags |= ARM_TBFLAG_VFPEN_MASK;
+        int privmode;
+        *pc = env->regs[15];
+        *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
+            | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
+            | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
+            | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
+            | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
+        if (arm_feature(env, ARM_FEATURE_M)) {
+            privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
+        } else {
+            privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
+        }
+        if (privmode) {
+            *flags |= ARM_TBFLAG_PRIV_MASK;
+        }
+        if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
+            *flags |= ARM_TBFLAG_VFPEN_MASK;
+        }
     }
+
+    *cs_base = 0;
 }
 
 static inline bool cpu_has_work(CPUState *cpu)
@@ -727,11 +779,15 @@  static inline bool cpu_has_work(CPUState *cpu)
 
 static inline void cpu_pc_from_tb(CPUARMState *env, TranslationBlock *tb)
 {
-    env->regs[15] = tb->pc;
+    if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
+        env->pc = tb->pc;
+    } else {
+        env->regs[15] = tb->pc;
+    }
 }
 
 /* Load an instruction and return it in the standard little-endian order */
-static inline uint32_t arm_ldl_code(CPUARMState *env, uint32_t addr,
+static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
                                     bool do_swap)
 {
     uint32_t insn = cpu_ldl_code(env, addr);
@@ -742,7 +798,7 @@  static inline uint32_t arm_ldl_code(CPUARMState *env, uint32_t addr,
 }
 
 /* Ditto, for a halfword (Thumb) instruction */
-static inline uint16_t arm_lduw_code(CPUARMState *env, uint32_t addr,
+static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
                                      bool do_swap)
 {
     uint16_t insn = cpu_lduw_code(env, addr);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 36537bd..a4b6d8a 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -9750,7 +9750,7 @@  static inline void gen_intermediate_code_internal(CPUARMState *env,
     uint16_t *gen_opc_end;
     int j, lj;
     target_ulong pc_start;
-    uint32_t next_page_start;
+    target_ulong next_page_start;
     int num_insns;
     int max_insns;
 
@@ -9838,7 +9838,7 @@  static inline void gen_intermediate_code_internal(CPUARMState *env,
         /* Intercept jump to the magic kernel page.  */
         if (dc->pc >= 0xffff0000) {
             /* We always get here via a jump, so know we are not in a
-               conditional execution block.  */
+                conditional execution block.  */
             gen_exception(EXCP_KERNEL_TRAP);
             dc->is_jmp = DISAS_UPDATE;
             break;
@@ -9846,7 +9846,7 @@  static inline void gen_intermediate_code_internal(CPUARMState *env,
 #else
         if (dc->pc >= 0xfffffff0 && IS_M(env)) {
             /* We always get here via a jump, so know we are not in a
-               conditional execution block.  */
+                conditional execution block.  */
             gen_exception(EXCP_EXCEPTION_EXIT);
             dc->is_jmp = DISAS_UPDATE;
             break;
@@ -9905,7 +9905,7 @@  static inline void gen_intermediate_code_internal(CPUARMState *env,
         }
 
         if (tcg_check_temp_count()) {
-            fprintf(stderr, "TCG temporary leak before %08x\n", dc->pc);
+            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n", dc->pc);
         }
 
         /* Translation stops when a conditional branch is encountered.
@@ -10075,6 +10075,10 @@  void cpu_dump_state(CPUARMState *env, FILE *f, fprintf_function cpu_fprintf,
 
 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
 {
-    env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
+    if (is_a64(env)) {
+        env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    } else {
+        env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
+    }
     env->condexec_bits = gen_opc_condexec_bits[pc_pos];
 }