diff mbox

Minimal ARM LPAE support

Message ID 4F5F2D50.4030703@gmail.com
State New
Headers show

Commit Message

Alexey Starikovskiy March 13, 2012, 11:19 a.m. UTC
Minimal ARM LPAE support. Sufficient to boot Linux kernel on vexpress-a15

Signed-off-by: Alexey Starikovskiy <aystarik@gmail.com>
---
  target-arm/cpu.h       |   11 +-
  target-arm/helper.c    |  239 ++++++++++++++++++++++++++++++++++++++++++++----
  target-arm/helper.h    |    2
  target-arm/machine.c   |   16 +--
  target-arm/translate.c |   46 ++++++---
  5 files changed, 262 insertions(+), 52 deletions(-)

Comments

Mark Langsdorf March 13, 2012, 1:45 p.m. UTC | #1
On 03/13/2012 06:19 AM, Alexey Starikovskiy wrote:

The entire patch repeatedly fails scripts/checkpatch. More comments
below.

--Mark Langsdorf
Calxeda, Inc.

> Minimal ARM LPAE support. Sufficient to boot Linux kernel on vexpress-a15
> 
> Signed-off-by: Alexey Starikovskiy <aystarik@gmail.com>
> ---
>   target-arm/cpu.h       |   11 +-
>   target-arm/helper.c    |  239 ++++++++++++++++++++++++++++++++++++++++++++----
>   target-arm/helper.h    |    2
>   target-arm/machine.c   |   16 +--
>   target-arm/translate.c |   46 ++++++---
>   5 files changed, 262 insertions(+), 52 deletions(-)
> 
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index 0d9b39c..1d87c7e 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -117,11 +117,9 @@ typedef struct CPUARMState {
>           uint32_t c1_coproc; /* Coprocessor access register.  */
>           uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
>           uint32_t c1_scr; /* secure config register.  */
> -        uint32_t c2_base0; /* MMU translation table base 0.  */
> -        uint32_t c2_base1; /* MMU translation table base 1.  */
> +        uint64_t c2_base0; /* MMU translation table base 0.  */
> +        uint64_t c2_base1; /* MMU translation table base 1.  */
>           uint32_t c2_control; /* MMU translation table base control.  */
> -        uint32_t c2_mask; /* MMU translation table base selection mask.  */
> -        uint32_t c2_base_mask; /* MMU translation table base 0 mask. */
>           uint32_t c2_data; /* MPU data cachable bits.  */
>           uint32_t c2_insn; /* MPU instruction cachable bits.  */
>           uint32_t c3; /* MMU domain access control register
> @@ -131,7 +129,7 @@ typedef struct CPUARMState {
>           uint32_t c6_region[8]; /* MPU base/size registers.  */
>           uint32_t c6_insn; /* Fault address registers.  */
>           uint32_t c6_data;
> -        uint32_t c7_par;  /* Translation result. */
> +        uint64_t c7_par;  /* Translation result. */
>           uint32_t c9_insn; /* Cache lockdown registers.  */
>           uint32_t c9_data;
>           uint32_t c9_pmcr; /* performance monitor control register */
> @@ -383,6 +381,7 @@ enum arm_features {
>       ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */
>       ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */
>       ARM_FEATURE_GENERIC_TIMER,
> +    ARM_FEATURE_LPAE, /* Large Physical Address Extension */
>   };
> 
>   static inline int arm_feature(CPUARMState *env, int feature)
> @@ -455,7 +454,7 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
>   #define cpu_signal_handler cpu_arm_signal_handler
>   #define cpu_list arm_cpu_list
> 
> -#define CPU_SAVE_VERSION 6
> +#define CPU_SAVE_VERSION 7
> 
>   /* MMU modes definitions */
>   #define MMU_MODE0_SUFFIX _kernel
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index abe1c30..a5d5e7d 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -172,6 +172,7 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
>           set_feature(env, ARM_FEATURE_ARM_DIV);
>           set_feature(env, ARM_FEATURE_V7MP);
>           set_feature(env, ARM_FEATURE_GENERIC_TIMER);
> +        set_feature(env, ARM_FEATURE_LPAE);
>           env->vfp.xregs[ARM_VFP_FPSID] = 0x410430f0;
>           env->vfp.xregs[ARM_VFP_MVFR0] = 0x10110222;
>           env->vfp.xregs[ARM_VFP_MVFR1] = 0x11111111;
> @@ -325,7 +326,6 @@ void cpu_reset(CPUARMState *env)
>           }
>       }
>       env->vfp.xregs[ARM_VFP_FPEXC] = 0;
> -    env->cp15.c2_base_mask = 0xffffc000u;
>       /* v7 performance monitor control register: same implementor
>        * field as main ID register, and we implement no event counters.
>        */
> @@ -671,6 +671,16 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
>       cpu_abort(env, "cp15 insn %08x\n", insn);
>   }
> 
> +void HELPER(set_cp15_64)(CPUState *env, uint32_t insn, uint64_t val)
> +{
> +    cpu_abort(env, "cp15 insn %08x\n", insn);
> +}
> +
> +uint64_t HELPER(get_cp15_64)(CPUState *env, uint32_t insn)
> +{
> +    cpu_abort(env, "cp15 insn %08x\n", insn);
> +}
> +
>   /* These should probably raise undefined insn exceptions.  */
>   void HELPER(v7m_msr)(CPUState *env, uint32_t reg, uint32_t val)
>   {
> @@ -1050,18 +1060,21 @@ static inline int check_ap(CPUState *env, int ap, int domain_prot,
>   static uint32_t get_level1_table_address(CPUState *env, uint32_t address)
>   {
>       uint32_t table;
> +    int t0size = env->cp15.c2_control & 0x7;
> +    uint32_t mask = ~(((uint32_t)0xffffffffu) >> t0size);
> 
> -    if (address & env->cp15.c2_mask)
> +    if (address & mask)
>           table = env->cp15.c2_base1 & 0xffffc000;
> -    else
> -        table = env->cp15.c2_base0 & env->cp15.c2_base_mask;
> -
> +    else {
> +        mask = ~((uint32_t)0x3fffu >> t0size);
> +        table = env->cp15.c2_base0 & mask;
> +    }
>       table |= (address >> 18) & 0x3ffc;
>       return table;
>   }
> 
>   static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type,
> -                int is_user, uint32_t *phys_ptr, int *prot,
> +                int is_user, uint64_t *phys_ptr, int *prot,
>                               target_ulong *page_size)
>   {
>       int code;
> @@ -1156,7 +1169,7 @@ do_fault:
>   }
> 
>   static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type,
> -                int is_user, uint32_t *phys_ptr, int *prot,
> +                int is_user, uint64_t *phys_ptr, int *prot,
>                               target_ulong *page_size)
>   {
>       int code;
> @@ -1260,7 +1273,7 @@ do_fault:
>   }
> 
>   static int get_phys_addr_mpu(CPUState *env, uint32_t address, int access_type,
> -                 int is_user, uint32_t *phys_ptr, int *prot)
> +                 int is_user, uint64_t *phys_ptr, int *prot)
>   {
>       int n;
>       uint32_t mask;
> @@ -1319,9 +1332,155 @@ static int get_phys_addr_mpu(CPUState *env, uint32_t address, int access_type,
>       return 0;
>   }
> 
> +static inline uint64_t bitrange(int m, int n) {
> +    int len = m - n + 1; // inclusive
> +    if (len < 1)
> +        return 0;
> +    return (((1ULL << len) - 1) << n);
> +}
> +
> +static int get_phys_addr_lpae(CPUState *env, uint32_t address, int access_type,
> +                            int is_user, uint64_t *phys_ptr, int *prot,
> +                            target_ulong *page_size)
> +{
> +    uint64_t base_address = 0, ia = address;
> +    bool base_found = false;
> +    bool disabled = false;
> +    int type = 1; // Translation
> +    uint32_t ttbcr = env->cp15.c2_control;
> +    uint64_t ttbr0 = env->cp15.c2_base0;
> +    uint32_t t0size = ttbcr & 0x7; //T0SZ
> +    int level = 0, start_bit;
> +    bool is_secure = 1, lookup_secure = is_secure;
> +    if (t0size || (ia & bitrange(31, 32 - t0size)) == 0) {

Won't this branch always be taken? if t0size is greater than 0,
then it passes, and if t0size is 0, bitrange returns 0 and it
passes.

> +        level = ((t0size & 6) == 0) ? 1 : 2;
> +        int lowerbound = 9 * level - t0size - 4;
> +        base_address = bitrange(39, lowerbound) & ttbr0;
> +        base_found = true;
> +        disabled = ttbcr & (1 << 7); // EPD0
> +        start_bit = 31 - t0size;
> +        //TODO unpack type info from TTBCR
> +    }
> +    uint32_t t1size = (ttbcr >> 16) & 0x7; // T1SZ
> +    uint64_t ttbr1 = env->cp15.c2_base1;

Aren't all variables supposed to be declared at the start of the
function?

> +    if ((t1size == 0 && !base_found) || (bitrange(31, 32 - t1size) & ~ia) == 0) {
> +        level = ((t1size & 6) == 0) ? 1 : 2;
> +        int lowerbound = 9 * level - t1size - 4;
> +        base_address = bitrange(39, lowerbound) & ttbr1;
> +        base_found = true;
> +        disabled = ttbcr & (1 << 23); // EPD1
> +        start_bit = 31 - t1size;
> +        //TODO unpack type info from TTBCR
> +    }

This two if-branches look similar enough that they could be turned
into one function, which would also make this entire function more
readable.

> +    if (!base_found || disabled)
> +        goto do_fault;
> +    bool first_iteration = true;
> +    bool table_rw = true;
> +    bool table_user = true;
> +    bool table_xn = false;
> +    bool table_pxn = false;
> +    uint32_t attrs = 0;
> +    bool lookup_finished;
> +    do {
> +        lookup_finished = true;
> +        bool block_translate = false;
> +        int offset = 9 * level;
> +        uint64_t ia_select;
> +        if (first_iteration)
> +            ia_select = ((ia & bitrange(start_bit, 39 - offset)) >> (39 - offset - 3));
> +        else
> +            ia_select = ((ia & bitrange(47 - offset, 39 - offset)) >> (39 - offset - 3));
> +        uint64_t lookup_address = base_address | ia_select;
> +        first_iteration = false;

    ia_select = ((ia & bitrange(start_bit, 39 - offset)) >>
                 (39 - offset - 3));
    if (first_iteration) {
        first_iteration = false;
        start_bit = 47 - offset;
    }

    might be slightly more readable.

> +        uint64_t descriptor = ldq_phys(lookup_address);
> +        if ((descriptor & 1) == 0) {
> +            goto do_fault;
> +        } else {
> +            if ((descriptor & 2) == 0) {
> +                if (level == 3) {
> +                    goto do_fault;
> +                } else
> +                    block_translate = true;
> +            } else {
> +                if (level == 3)
> +                    block_translate = true;
> +                else {
> +                    base_address = bitrange(39, 12) & descriptor;
> +                    lookup_secure = lookup_secure && !((descriptor >> 63) & 1);
> +                    table_rw = table_rw && !((descriptor >> 62) & 1);
> +                    table_user = table_user && !((descriptor >> 61) & 1);
> +                    table_xn = table_xn || ((descriptor >> 60) & 1);
> +                    table_pxn = table_pxn || ((descriptor >> 59) & 1);
> +                    lookup_finished = false;
> +                }
> +            }
> +        }
> +        if (block_translate) {
> +            *phys_ptr = (bitrange(39, 39 - offset) & descriptor) | (bitrange(38 - offset, 0) & ia);
> +            attrs = ((descriptor & bitrange(54, 52)) >> 42) |
> +                    ((descriptor & bitrange(11, 2)) >> 2);
> +            if (table_xn)
> +                attrs |= 1 << 12;
> +            if (table_pxn)
> +                attrs |= 1 << 11;
> +            if (is_secure && !lookup_secure)
> +                attrs |= 1 << 9;
> +            if (!table_rw)
> +                attrs |= 1 << 5;
> +            if (!table_user)
> +                attrs &= ~(1UL << 4);
> +            if (!lookup_secure)
> +                attrs |= 1 << 3;
> +        } else
> +            ++level;
> +    } while (!lookup_finished);

Possibly some of these magic numbers should be changed to #defines.

> +    // checks
> +    type = 2; // AccessFlag
> +    if ((attrs & (1 << 8)) == 0)
> +        goto do_fault;
> +    *prot = 0;
> +    if (((attrs >> 12) & 1) == 0)
> +        *prot |= PAGE_EXEC; // XN
> +    if (!is_user || ((attrs >> 11) & 1) == 0)
> +        *prot |= PAGE_EXEC; // PXN
> +    type = 3; // Permissions
> +    switch ((attrs >> 4) & 3) {
> +    case 0:
> +        if (is_user)
> +            goto do_fault;
> +        // fall through
> +    case 1:
> +        *prot |= PAGE_READ | PAGE_WRITE;
> +        break;
> +    case 2:
> +        if (is_user)
> +            goto do_fault;
> +        // fall through
> +    case 3:
> +        if (access_type == 1)
> +            goto do_fault;
> +        *prot |= PAGE_READ;
> +        break;
> +    }
> +    switch (level) {
> +        case 1:
> +            *page_size = 0x40000000; //1G
> +            break;
> +        case 2:
> +            *page_size = 0x200000; // 2M
> +            break;
> +        case 3:
> +            *page_size = 0x1000; // 4k
> +            break;
> +    }
> +    return 0;
> +do_fault:
> +    return (1 << 9) | (type << 2) | level; // DFSR value
> +}
> +
>   static inline int get_phys_addr(CPUState *env, uint32_t address,
>                                   int access_type, int is_user,
> -                                uint32_t *phys_ptr, int *prot,
> +                                uint64_t *phys_ptr, int *prot,
>                                   target_ulong *page_size)
>   {
>       /* Fast Context Switch Extension.  */
> @@ -1336,8 +1495,10 @@ static inline int get_phys_addr(CPUState *env, uint32_t address,
>           return 0;
>       } else if (arm_feature(env, ARM_FEATURE_MPU)) {
>           *page_size = TARGET_PAGE_SIZE;
> -    return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr,
> -                 prot);
> +        return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr, prot);
> +    } else if (env->cp15.c2_control & (1 << 31)) {
> +        return get_phys_addr_lpae(env, address, access_type, is_user, phys_ptr,
> +                                  prot, page_size);
>       } else if (env->cp15.c1_sys & (1 << 23)) {
>           return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr,
>                                   prot, page_size);
> @@ -1350,7 +1511,7 @@ static inline int get_phys_addr(CPUState *env, uint32_t address,
>   int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address,
>                                 int access_type, int mmu_idx)
>   {
> -    uint32_t phys_addr;
> +    uint64_t phys_addr;
>       target_ulong page_size;
>       int prot;
>       int ret, is_user;
> @@ -1366,7 +1527,7 @@ int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address,
>           return 0;
>       }
> 
> -    if (access_type == 2) {
> +    if (access_type == 2 && !arm_feature(env, ARM_FEATURE_LPAE)) {
>           env->cp15.c5_insn = ret;
>           env->cp15.c6_insn = address;
>           env->exception_index = EXCP_PREFETCH_ABORT;
> @@ -1382,7 +1543,7 @@ int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address,
> 
>   target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
>   {
> -    uint32_t phys_addr;
> +    uint64_t phys_addr;
>       target_ulong page_size;
>       int prot;
>       int ret;
> @@ -1529,10 +1690,7 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
>           env->cp15.c2_base1 = val;
>           break;
>           case 2:
> -                val &= 7;
>                   env->cp15.c2_control = val;
> -        env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> val);
> -                env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> val);
>           break;
>           default:
>           goto bad_reg;
> @@ -1611,7 +1769,7 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
>                   }
>                   break;
>               case 8: {
> -                uint32_t phys_addr;
> +                uint64_t phys_addr;
>                   target_ulong page_size;
>                   int prot;
>                   int ret, is_user = op2 & 2;
> @@ -2261,6 +2419,51 @@ bad_reg:
>       return 0;
>   }
> 
> +void HELPER(set_cp15_64)(CPUState *env, uint32_t insn, uint64_t val)
> +{
> +    int crm = insn & 0xf;
> +    int opc1 = (insn >> 4) & 0xf;
> +    if (crm == 2) {
> +        switch (opc1) {
> +        case 0:
> +            env->cp15.c2_base0 = val;
> +            tlb_flush(env, 0); // might change ASID
> +            return;
> +        case 1:
> +            env->cp15.c2_base1 = val;
> +            tlb_flush(env, 0); // might change ASID
> +            return;
> +        default:;
> +        }
> +    } else if (crm == 7) {
> +        if (opc1 == 0) {
> +            env->cp15.c7_par = val;
> +            return;
> +        }
> +    }
> +    cpu_abort(env, "Unimplemented cp15 register 64bit write (c%d[%d])\n", crm, opc1);
> +}
> +
> +uint64_t HELPER(get_cp15_64)(CPUState *env, uint32_t insn)
> +{
> +    int crm = insn & 0xf;
> +    int opc1 = (insn >> 4) & 0xf;
> +    if (crm == 2) {
> +        switch (opc1) {
> +        case 0:
> +            return env->cp15.c2_base0;
> +        case 1:
> +            return env->cp15.c2_base1;
> +        default:;
> +        }
> +    } else if (crm == 7) {
> +        if (opc1 == 0)
> +            return env->cp15.c7_par;
> +    }
> +    cpu_abort(env, "Unimplemented cp15 register 64bit read (c%d[%d])\n", crm, opc1);
> +    return 0;
> +}
> +
>   void HELPER(set_r13_banked)(CPUState *env, uint32_t mode, uint32_t val)
>   {
>       if ((env->uncached_cpsr & CPSR_M) == mode) {
> diff --git a/target-arm/helper.h b/target-arm/helper.h
> index 16dd5fc..bc8151c 100644
> --- a/target-arm/helper.h
> +++ b/target-arm/helper.h
> @@ -60,7 +60,9 @@ DEF_HELPER_3(v7m_msr, void, env, i32, i32)
>   DEF_HELPER_2(v7m_mrs, i32, env, i32)
> 
>   DEF_HELPER_3(set_cp15, void, env, i32, i32)
> +DEF_HELPER_3(set_cp15_64, void, env, i32, i64)
>   DEF_HELPER_2(get_cp15, i32, env, i32)
> +DEF_HELPER_2(get_cp15_64, i64, env, i32)
> 
>   DEF_HELPER_3(set_cp, void, env, i32, i32)
>   DEF_HELPER_2(get_cp, i32, env, i32)
> diff --git a/target-arm/machine.c b/target-arm/machine.c
> index f66b8df..8fa738e 100644
> --- a/target-arm/machine.c
> +++ b/target-arm/machine.c
> @@ -27,11 +27,9 @@ void cpu_save(QEMUFile *f, void *opaque)
>       qemu_put_be32(f, env->cp15.c1_coproc);
>       qemu_put_be32(f, env->cp15.c1_xscaleauxcr);
>       qemu_put_be32(f, env->cp15.c1_scr);
> -    qemu_put_be32(f, env->cp15.c2_base0);
> -    qemu_put_be32(f, env->cp15.c2_base1);
> +    qemu_put_be64(f, env->cp15.c2_base0);
> +    qemu_put_be64(f, env->cp15.c2_base1);
>       qemu_put_be32(f, env->cp15.c2_control);
> -    qemu_put_be32(f, env->cp15.c2_mask);
> -    qemu_put_be32(f, env->cp15.c2_base_mask);
>       qemu_put_be32(f, env->cp15.c2_data);
>       qemu_put_be32(f, env->cp15.c2_insn);
>       qemu_put_be32(f, env->cp15.c3);
> @@ -42,7 +40,7 @@ void cpu_save(QEMUFile *f, void *opaque)
>       }
>       qemu_put_be32(f, env->cp15.c6_insn);
>       qemu_put_be32(f, env->cp15.c6_data);
> -    qemu_put_be32(f, env->cp15.c7_par);
> +    qemu_put_be64(f, env->cp15.c7_par);
>       qemu_put_be32(f, env->cp15.c9_insn);
>       qemu_put_be32(f, env->cp15.c9_data);
>       qemu_put_be32(f, env->cp15.c9_pmcr);
> @@ -145,11 +143,9 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
>       env->cp15.c1_coproc = qemu_get_be32(f);
>       env->cp15.c1_xscaleauxcr = qemu_get_be32(f);
>       env->cp15.c1_scr = qemu_get_be32(f);
> -    env->cp15.c2_base0 = qemu_get_be32(f);
> -    env->cp15.c2_base1 = qemu_get_be32(f);
> +    env->cp15.c2_base0 = qemu_get_be64(f);
> +    env->cp15.c2_base1 = qemu_get_be64(f);
>       env->cp15.c2_control = qemu_get_be32(f);
> -    env->cp15.c2_mask = qemu_get_be32(f);
> -    env->cp15.c2_base_mask = qemu_get_be32(f);
>       env->cp15.c2_data = qemu_get_be32(f);
>       env->cp15.c2_insn = qemu_get_be32(f);
>       env->cp15.c3 = qemu_get_be32(f);
> @@ -160,7 +156,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
>       }
>       env->cp15.c6_insn = qemu_get_be32(f);
>       env->cp15.c6_data = qemu_get_be32(f);
> -    env->cp15.c7_par = qemu_get_be32(f);
> +    env->cp15.c7_par = qemu_get_be64(f);
>       env->cp15.c9_insn = qemu_get_be32(f);
>       env->cp15.c9_data = qemu_get_be32(f);
>       env->cp15.c9_pmcr = qemu_get_be32(f);
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 280bfca..1f1dde0 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -2559,17 +2559,9 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
> 
>       /* M profile cores use memory mapped registers instead of cp15.  */
>       if (arm_feature(env, ARM_FEATURE_M))
> -    return 1;
> +        return 1;
> 
> -    if ((insn & (1 << 25)) == 0) {
> -        if (insn & (1 << 20)) {
> -            /* mrrc */
> -            return 1;
> -        }
> -        /* mcrr.  Used for block cache operations, so implement as no-op.  */
> -        return 0;
> -    }
> -    if ((insn & (1 << 4)) == 0) {
> +    if ((insn & (1 << 4)) == 0 && (insn & (1 << 25))) {
>           /* cdp */
>           return 1;
>       }
> @@ -2636,16 +2628,34 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
> 
>       tmp2 = tcg_const_i32(insn);
>       if (insn & ARM_CP_RW_BIT) {
> -        tmp = tcg_temp_new_i32();
> -        gen_helper_get_cp15(tmp, cpu_env, tmp2);
> -        /* If the destination register is r15 then sets condition codes.  */
> -        if (rd != 15)
> -            store_reg(s, rd, tmp);
> -        else
> -            tcg_temp_free_i32(tmp);
> +        if ((insn & (1 << 25))) {
> +            tmp = tcg_temp_new_i32();
> +            gen_helper_get_cp15(tmp, cpu_env, tmp2);
> +            /* If the destination register is r15 then sets condition codes.  */
> +            if (rd != 15)
> +                store_reg(s, rd, tmp);
> +            else
> +                tcg_temp_free_i32(tmp);
> +        } else {
> +            int rd1 = (insn >> 16) & 0xf;
> +            TCGv_i64 tmp1 = tcg_temp_new_i64();
> +            gen_helper_get_cp15_64(tmp1, cpu_env, tmp2);
> +            tcg_gen_trunc_i64_i32(cpu_R[rd], tmp1);
> +            tcg_gen_shri_i64(tmp1, tmp1, 32);
> +            tcg_gen_trunc_i64_i32(cpu_R[rd1], tmp1);
> +            tcg_temp_free_i64(tmp1);
> +        }
>       } else {
>           tmp = load_reg(s, rd);
> -        gen_helper_set_cp15(cpu_env, tmp2, tmp);
> +        if ((insn & (1 << 25))) {
> +            gen_helper_set_cp15(cpu_env, tmp2, tmp);
> +        } else {
> +            int rd1 = (insn >> 16) & 0xf;
> +            TCGv_i64 tmp1 = tcg_temp_new_i64();
> +            tcg_gen_concat_i32_i64(tmp1, cpu_R[rd], cpu_R[rd1]);
> +            gen_helper_set_cp15_64(cpu_env, tmp2, tmp1);
> +            tcg_temp_free_i64(tmp1);
> +        }
>           tcg_temp_free_i32(tmp);
>           /* Normally we would always end the TB here, but Linux
>            * arch/arm/mach-pxa/sleep.S expects two instructions following
> 
>
Peter Maydell March 13, 2012, 2:21 p.m. UTC | #2
On 13 March 2012 11:19, Alexey Starikovskiy <aystarik@gmail.com> wrote:
> Minimal ARM LPAE support. Sufficient to boot Linux kernel on vexpress-a15

This is missing chunks of what LPAE ought to mean, which is dubious,
and it doesn't state clearly what it is missing, which is definitely
a problem.

> +void HELPER(set_cp15_64)(CPUState *env, uint32_t insn, uint64_t val)
> +{
> +    int crm = insn & 0xf;
> +    int opc1 = (insn >> 4) & 0xf;
> +    if (crm == 2) {
> +        switch (opc1) {
> +        case 0:
> +            env->cp15.c2_base0 = val;
> +            tlb_flush(env, 0); // might change ASID
> +            return;
> +        case 1:
> +            env->cp15.c2_base1 = val;
> +            tlb_flush(env, 0); // might change ASID
> +            return;
> +        default:;
> +        }
> +    } else if (crm == 7) {
> +        if (opc1 == 0) {
> +            env->cp15.c7_par = val;
> +            return;
> +        }
> +    }
> +    cpu_abort(env, "Unimplemented cp15 register 64bit write (c%d[%d])\n",
> crm, opc1);
> +}

This is aiming 64 bit cp15 handling for the same swamp in which
32 bit cp15 is currently stuck. At a bare minimum, this must do
complete decoding of all the crn/crm/opc fields, and must restrict
register implementations to only CPUs where they exist (ie check
ARM_FEATURE_LPAE).

> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -2559,17 +2559,9 @@ static int disas_cp15_insn(CPUState *env,
> DisasContext *s, uint32_t insn)
>
>     /* M profile cores use memory mapped registers instead of cp15.  */
>     if (arm_feature(env, ARM_FEATURE_M))
> -    return 1;
> +        return 1;
>
> -    if ((insn & (1 << 25)) == 0) {
> -        if (insn & (1 << 20)) {
> -            /* mrrc */
> -            return 1;
> -        }
> -        /* mcrr.  Used for block cache operations, so implement as no-op.
>  */

This change seems to drop the MCRR block cache op handling and I don't
see anything elsewhere which implements it. This will presumably break
some CPU/guest combination.

More general question -- should we move target_phys_addr_t to a 64
bit type?

-- PMM
Alexey Starikovskiy March 13, 2012, 2:51 p.m. UTC | #3
On 03/13/2012 06:21 PM, Peter Maydell wrote:
> On 13 March 2012 11:19, Alexey Starikovskiy<aystarik@gmail.com>  wrote:
>> Minimal ARM LPAE support. Sufficient to boot Linux kernel on vexpress-a15
> This is missing chunks of what LPAE ought to mean, which is dubious,
> and it doesn't state clearly what it is missing, which is definitely
> a problem.
>
I've cut out Virtualization, TrustZone, shareable/cache attributes.
Also, as you've mentioned later, phys_address is still 32 bits, so we really can't get more memory than before.
>> +void HELPER(set_cp15_64)(CPUState *env, uint32_t insn, uint64_t val)
>> +{
>> +    int crm = insn&  0xf;
>> +    int opc1 = (insn>>  4)&  0xf;
>> +    if (crm == 2) {
>> +        switch (opc1) {
>> +        case 0:
>> +            env->cp15.c2_base0 = val;
>> +            tlb_flush(env, 0); // might change ASID
>> +            return;
>> +        case 1:
>> +            env->cp15.c2_base1 = val;
>> +            tlb_flush(env, 0); // might change ASID
>> +            return;
>> +        default:;
>> +        }
>> +    } else if (crm == 7) {
>> +        if (opc1 == 0) {
>> +            env->cp15.c7_par = val;
>> +            return;
>> +        }
>> +    }
>> +    cpu_abort(env, "Unimplemented cp15 register 64bit write (c%d[%d])\n",
>> crm, opc1);
>> +}
> This is aiming 64 bit cp15 handling for the same swamp in which
> 32 bit cp15 is currently stuck. At a bare minimum, this must do
> complete decoding of all the crn/crm/opc fields, and must restrict
> register implementations to only CPUs where they exist (ie check
> ARM_FEATURE_LPAE).
>
As it is a helper for MCRR, only crm and opc1 fields are present in instruction. All allowed values are checked.
I'll need to check how MCRR to these registers will behave if LPAE is not featured
>> --- a/target-arm/translate.c
>> +++ b/target-arm/translate.c
>> @@ -2559,17 +2559,9 @@ static int disas_cp15_insn(CPUState *env,
>> DisasContext *s, uint32_t insn)
>>
>>      /* M profile cores use memory mapped registers instead of cp15.  */
>>      if (arm_feature(env, ARM_FEATURE_M))
>> -    return 1;
>> +        return 1;
>>
>> -    if ((insn&  (1<<  25)) == 0) {
>> -        if (insn&  (1<<  20)) {
>> -            /* mrrc */
>> -            return 1;
>> -        }
>> -        /* mcrr.  Used for block cache operations, so implement as no-op.
>>   */
> This change seems to drop the MCRR block cache op handling and I don't
> see anything elsewhere which implements it. This will presumably break
> some CPU/guest combination.
Do you have any pointer on that exactly did we try to emulate here?
> More general question -- should we move target_phys_addr_t to a 64
> bit type?
It could be done later on, for now it works as is. Making it 64 bit breaks DMA in pxa*, so I'm not sure how to do that properly.

Thanks,
Alex.
Peter Maydell March 13, 2012, 3:14 p.m. UTC | #4
On 13 March 2012 14:51, Alexey Starikovskiy <aystarik@gmail.com> wrote:
> On 03/13/2012 06:21 PM, Peter Maydell wrote:
>>
>> On 13 March 2012 11:19, Alexey Starikovskiy<aystarik@gmail.com>  wrote:
>>>
>>> Minimal ARM LPAE support. Sufficient to boot Linux kernel on vexpress-a15
>>
>> This is missing chunks of what LPAE ought to mean, which is dubious,
>> and it doesn't state clearly what it is missing, which is definitely
>> a problem.
>>
> I've cut out Virtualization, TrustZone, shareable/cache attributes.
> Also, as you've mentioned later, phys_address is still 32 bits, so we really
> can't get more memory than before.

I really meant, what bits of LPAE are you missing?

To pick an example entirely at random, LPAE implies that the DBGDRAR is
now 64 bits. Another: LPAE implies PXN in the short descriptor translation
table format. (PXN should have its own feature bit as it's permitted
for non-LPAE cores too.)

The ARM ARM has a convenient list in section B1.6.

> As it is a helper for MCRR, only crm and opc1 fields are present in
> instruction. All allowed values are checked.

Oops, yes. Sorry about the incorrect complaint, I'd thought MCRR had
crn as well.

>> This change seems to drop the MCRR block cache op handling and I don't
>> see anything elsewhere which implements it. This will presumably break
>> some CPU/guest combination.
>
> Do you have any pointer on that exactly did we try to emulate here?

No. You'll need to check the TRMs for every CPU core we claim to
emulate and the ARM ARM (including the ARMv6 and ARMv5 versions
as well as the current revision). Yes, this is a really painful
chore. You'll also need to try to round up some images so you can
test at least a handful of them.

(I've been ploughing through this for the 32 bit registers as part
of trying to convert them to a more data driven implementation.)

I suggest you split your patches up a bit, maybe:
 * framework for 64 bit cp15 registers
 * handle block cache ops there rather than special case
 * lpae changes

>> More general question -- should we move target_phys_addr_t to a 64
>> bit type?
>
> It could be done later on, for now it works as is. Making it 64 bit breaks
> DMA in pxa*, so I'm not sure how to do that properly.

We should probably fix anything that would break on a 64 bit
target_phys_addr_t anyway, there was a discussion a while back
about just shifting everything in qemu to 64 bit regardless.

-- PMM
Alexey Starikovskiy March 13, 2012, 3:27 p.m. UTC | #5
On 03/13/2012 07:14 PM, Peter Maydell wrote:
>>> >>  This change seems to drop the MCRR block cache op handling and I don't
>>> >>  see anything elsewhere which implements it. This will presumably break
>>> >>  some CPU/guest combination.
>> >
>> >  Do you have any pointer on that exactly did we try to emulate here?
> No. You'll need to check the TRMs for every CPU core we claim to
> emulate and the ARM ARM (including the ARMv6 and ARMv5 versions
> as well as the current revision). Yes, this is a really painful
> chore. You'll also need to try to round up some images so you can
> test at least a handful of them.
>
> (I've been ploughing through this for the 32 bit registers as part
> of trying to convert them to a more data driven implementation.)
>
There is B3.15.1 block in the ARM ARM, it says that there were no 64-bit access to system registers before LPAE and Generic Timer. Do you mean that some 64-bit system registers were defined for specific CPUs?

Thanks,
Alex.
Peter Maydell March 13, 2012, 3:50 p.m. UTC | #6
On 13 March 2012 15:27, Alexey Starikovskiy <aystarik@gmail.com> wrote:
> There is B3.15.1 block in the ARM ARM, it says that there were no 64-bit
> access to system registers before LPAE and Generic Timer. Do you mean that
> some 64-bit system registers were defined for specific CPUs?

Yes (for instance, see the 1136 TRM, and the ARMv6 ARM ARM section B6.6.6.)
This is why I said you have to read all the TRMs and the previous
versions of the architecture manual...

-- PMM
Alexey Starikovskiy March 13, 2012, 5:21 p.m. UTC | #7
Minimal ARM LPAE support. Sufficient to boot Linux kernel on vexpress-a15

Missing:
* Extends the DBGDRAR and DBGDSAR to 64 bits, to hold PAs of up to 40 bits.
* Defines two Memory Attribute Indirection Registers, MAIRn, to replace PRRR and NMRR when
using the Long-descriptor translation table format.
* Provides two IMPLEMENTATION DEFINED Auxiliary Memory Attribute Indirection Registers 0
AMAIRn.

Does not change behaviour of Short-descriptor format (AKA _v6)
diff mbox

Patch

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 0d9b39c..1d87c7e 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -117,11 +117,9 @@  typedef struct CPUARMState {
          uint32_t c1_coproc; /* Coprocessor access register.  */
          uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
          uint32_t c1_scr; /* secure config register.  */
-        uint32_t c2_base0; /* MMU translation table base 0.  */
-        uint32_t c2_base1; /* MMU translation table base 1.  */
+        uint64_t c2_base0; /* MMU translation table base 0.  */
+        uint64_t c2_base1; /* MMU translation table base 1.  */
          uint32_t c2_control; /* MMU translation table base control.  */
-        uint32_t c2_mask; /* MMU translation table base selection mask.  */
-        uint32_t c2_base_mask; /* MMU translation table base 0 mask. */
          uint32_t c2_data; /* MPU data cachable bits.  */
          uint32_t c2_insn; /* MPU instruction cachable bits.  */
          uint32_t c3; /* MMU domain access control register
@@ -131,7 +129,7 @@  typedef struct CPUARMState {
          uint32_t c6_region[8]; /* MPU base/size registers.  */
          uint32_t c6_insn; /* Fault address registers.  */
          uint32_t c6_data;
-        uint32_t c7_par;  /* Translation result. */
+        uint64_t c7_par;  /* Translation result. */
          uint32_t c9_insn; /* Cache lockdown registers.  */
          uint32_t c9_data;
          uint32_t c9_pmcr; /* performance monitor control register */
@@ -383,6 +381,7 @@  enum arm_features {
      ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */
      ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */
      ARM_FEATURE_GENERIC_TIMER,
+    ARM_FEATURE_LPAE, /* Large Physical Address Extension */
  };

  static inline int arm_feature(CPUARMState *env, int feature)
@@ -455,7 +454,7 @@  void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
  #define cpu_signal_handler cpu_arm_signal_handler
  #define cpu_list arm_cpu_list

-#define CPU_SAVE_VERSION 6
+#define CPU_SAVE_VERSION 7

  /* MMU modes definitions */
  #define MMU_MODE0_SUFFIX _kernel
diff --git a/target-arm/helper.c b/target-arm/helper.c
index abe1c30..a5d5e7d 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -172,6 +172,7 @@  static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
          set_feature(env, ARM_FEATURE_ARM_DIV);
          set_feature(env, ARM_FEATURE_V7MP);
          set_feature(env, ARM_FEATURE_GENERIC_TIMER);
+        set_feature(env, ARM_FEATURE_LPAE);
          env->vfp.xregs[ARM_VFP_FPSID] = 0x410430f0;
          env->vfp.xregs[ARM_VFP_MVFR0] = 0x10110222;
          env->vfp.xregs[ARM_VFP_MVFR1] = 0x11111111;
@@ -325,7 +326,6 @@  void cpu_reset(CPUARMState *env)
          }
      }
      env->vfp.xregs[ARM_VFP_FPEXC] = 0;
-    env->cp15.c2_base_mask = 0xffffc000u;
      /* v7 performance monitor control register: same implementor
       * field as main ID register, and we implement no event counters.
       */
@@ -671,6 +671,16 @@  uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
      cpu_abort(env, "cp15 insn %08x\n", insn);
  }

+void HELPER(set_cp15_64)(CPUState *env, uint32_t insn, uint64_t val)
+{
+    cpu_abort(env, "cp15 insn %08x\n", insn);
+}
+
+uint64_t HELPER(get_cp15_64)(CPUState *env, uint32_t insn)
+{
+    cpu_abort(env, "cp15 insn %08x\n", insn);
+}
+
  /* These should probably raise undefined insn exceptions.  */
  void HELPER(v7m_msr)(CPUState *env, uint32_t reg, uint32_t val)
  {
@@ -1050,18 +1060,21 @@  static inline int check_ap(CPUState *env, int ap, int domain_prot,
  static uint32_t get_level1_table_address(CPUState *env, uint32_t address)
  {
      uint32_t table;
+    int t0size = env->cp15.c2_control & 0x7;
+    uint32_t mask = ~(((uint32_t)0xffffffffu) >> t0size);

-    if (address & env->cp15.c2_mask)
+    if (address & mask)
          table = env->cp15.c2_base1 & 0xffffc000;
-    else
-        table = env->cp15.c2_base0 & env->cp15.c2_base_mask;
-
+    else {
+        mask = ~((uint32_t)0x3fffu >> t0size);
+        table = env->cp15.c2_base0 & mask;
+    }
      table |= (address >> 18) & 0x3ffc;
      return table;
  }

  static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type,
-                int is_user, uint32_t *phys_ptr, int *prot,
+                int is_user, uint64_t *phys_ptr, int *prot,
                              target_ulong *page_size)
  {
      int code;
@@ -1156,7 +1169,7 @@  do_fault:
  }

  static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type,
-                int is_user, uint32_t *phys_ptr, int *prot,
+                int is_user, uint64_t *phys_ptr, int *prot,
                              target_ulong *page_size)
  {
      int code;
@@ -1260,7 +1273,7 @@  do_fault:
  }

  static int get_phys_addr_mpu(CPUState *env, uint32_t address, int access_type,
-                 int is_user, uint32_t *phys_ptr, int *prot)
+                 int is_user, uint64_t *phys_ptr, int *prot)
  {
      int n;
      uint32_t mask;
@@ -1319,9 +1332,155 @@  static int get_phys_addr_mpu(CPUState *env, uint32_t address, int access_type,
      return 0;
  }

+static inline uint64_t bitrange(int m, int n) {
+    int len = m - n + 1; // inclusive
+    if (len < 1)
+        return 0;
+    return (((1ULL << len) - 1) << n);
+}
+
+static int get_phys_addr_lpae(CPUState *env, uint32_t address, int access_type,
+                            int is_user, uint64_t *phys_ptr, int *prot,
+                            target_ulong *page_size)
+{
+    uint64_t base_address = 0, ia = address;
+    bool base_found = false;
+    bool disabled = false;
+    int type = 1; // Translation
+    uint32_t ttbcr = env->cp15.c2_control;
+    uint64_t ttbr0 = env->cp15.c2_base0;
+    uint32_t t0size = ttbcr & 0x7; //T0SZ
+    int level = 0, start_bit;
+    bool is_secure = 1, lookup_secure = is_secure;
+    if (t0size || (ia & bitrange(31, 32 - t0size)) == 0) {
+        level = ((t0size & 6) == 0) ? 1 : 2;
+        int lowerbound = 9 * level - t0size - 4;
+        base_address = bitrange(39, lowerbound) & ttbr0;
+        base_found = true;
+        disabled = ttbcr & (1 << 7); // EPD0
+        start_bit = 31 - t0size;
+        //TODO unpack type info from TTBCR
+    }
+    uint32_t t1size = (ttbcr >> 16) & 0x7; // T1SZ
+    uint64_t ttbr1 = env->cp15.c2_base1;
+    if ((t1size == 0 && !base_found) || (bitrange(31, 32 - t1size) & ~ia) == 0) {
+        level = ((t1size & 6) == 0) ? 1 : 2;
+        int lowerbound = 9 * level - t1size - 4;
+        base_address = bitrange(39, lowerbound) & ttbr1;
+        base_found = true;
+        disabled = ttbcr & (1 << 23); // EPD1
+        start_bit = 31 - t1size;
+        //TODO unpack type info from TTBCR
+    }
+    if (!base_found || disabled)
+        goto do_fault;
+    bool first_iteration = true;
+    bool table_rw = true;
+    bool table_user = true;
+    bool table_xn = false;
+    bool table_pxn = false;
+    uint32_t attrs = 0;
+    bool lookup_finished;
+    do {
+        lookup_finished = true;
+        bool block_translate = false;
+        int offset = 9 * level;
+        uint64_t ia_select;
+        if (first_iteration)
+            ia_select = ((ia & bitrange(start_bit, 39 - offset)) >> (39 - offset - 3));
+        else
+            ia_select = ((ia & bitrange(47 - offset, 39 - offset)) >> (39 - offset - 3));
+        uint64_t lookup_address = base_address | ia_select;
+        first_iteration = false;
+        uint64_t descriptor = ldq_phys(lookup_address);
+        if ((descriptor & 1) == 0) {
+            goto do_fault;
+        } else {
+            if ((descriptor & 2) == 0) {
+                if (level == 3) {
+                    goto do_fault;
+                } else
+                    block_translate = true;
+            } else {
+                if (level == 3)
+                    block_translate = true;
+                else {
+                    base_address = bitrange(39, 12) & descriptor;
+                    lookup_secure = lookup_secure && !((descriptor >> 63) & 1);
+                    table_rw = table_rw && !((descriptor >> 62) & 1);
+                    table_user = table_user && !((descriptor >> 61) & 1);
+                    table_xn = table_xn || ((descriptor >> 60) & 1);
+                    table_pxn = table_pxn || ((descriptor >> 59) & 1);
+                    lookup_finished = false;
+                }
+            }
+        }
+        if (block_translate) {
+            *phys_ptr = (bitrange(39, 39 - offset) & descriptor) | (bitrange(38 - offset, 0) & ia);
+            attrs = ((descriptor & bitrange(54, 52)) >> 42) |
+                    ((descriptor & bitrange(11, 2)) >> 2);
+            if (table_xn)
+                attrs |= 1 << 12;
+            if (table_pxn)
+                attrs |= 1 << 11;
+            if (is_secure && !lookup_secure)
+                attrs |= 1 << 9;
+            if (!table_rw)
+                attrs |= 1 << 5;
+            if (!table_user)
+                attrs &= ~(1UL << 4);
+            if (!lookup_secure)
+                attrs |= 1 << 3;
+        } else
+            ++level;
+    } while (!lookup_finished);
+    // checks
+    type = 2; // AccessFlag
+    if ((attrs & (1 << 8)) == 0)
+        goto do_fault;
+    *prot = 0;
+    if (((attrs >> 12) & 1) == 0)
+        *prot |= PAGE_EXEC; // XN
+    if (!is_user || ((attrs >> 11) & 1) == 0)
+        *prot |= PAGE_EXEC; // PXN
+    type = 3; // Permissions
+    switch ((attrs >> 4) & 3) {
+    case 0:
+        if (is_user)
+            goto do_fault;
+        // fall through
+    case 1:
+        *prot |= PAGE_READ | PAGE_WRITE;
+        break;
+    case 2:
+        if (is_user)
+            goto do_fault;
+        // fall through
+    case 3:
+        if (access_type == 1)
+            goto do_fault;
+        *prot |= PAGE_READ;
+        break;
+    }
+    switch (level) {
+        case 1:
+            *page_size = 0x40000000; //1G
+            break;
+        case 2:
+            *page_size = 0x200000; // 2M
+            break;
+        case 3:
+            *page_size = 0x1000; // 4k
+            break;
+    }
+    return 0;
+do_fault:
+    return (1 << 9) | (type << 2) | level; // DFSR value
+}
+
  static inline int get_phys_addr(CPUState *env, uint32_t address,
                                  int access_type, int is_user,
-                                uint32_t *phys_ptr, int *prot,
+                                uint64_t *phys_ptr, int *prot,
                                  target_ulong *page_size)
  {
      /* Fast Context Switch Extension.  */
@@ -1336,8 +1495,10 @@  static inline int get_phys_addr(CPUState *env, uint32_t address,
          return 0;
      } else if (arm_feature(env, ARM_FEATURE_MPU)) {
          *page_size = TARGET_PAGE_SIZE;
-    return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr,
-                 prot);
+        return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr, prot);
+    } else if (env->cp15.c2_control & (1 << 31)) {
+        return get_phys_addr_lpae(env, address, access_type, is_user, phys_ptr,
+                                  prot, page_size);
      } else if (env->cp15.c1_sys & (1 << 23)) {
          return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr,
                                  prot, page_size);
@@ -1350,7 +1511,7 @@  static inline int get_phys_addr(CPUState *env, uint32_t address,
  int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address,
                                int access_type, int mmu_idx)
  {
-    uint32_t phys_addr;
+    uint64_t phys_addr;
      target_ulong page_size;
      int prot;
      int ret, is_user;
@@ -1366,7 +1527,7 @@  int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address,
          return 0;
      }

-    if (access_type == 2) {
+    if (access_type == 2 && !arm_feature(env, ARM_FEATURE_LPAE)) {
          env->cp15.c5_insn = ret;
          env->cp15.c6_insn = address;
          env->exception_index = EXCP_PREFETCH_ABORT;
@@ -1382,7 +1543,7 @@  int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address,

  target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
  {
-    uint32_t phys_addr;
+    uint64_t phys_addr;
      target_ulong page_size;
      int prot;
      int ret;
@@ -1529,10 +1690,7 @@  void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
          env->cp15.c2_base1 = val;
          break;
          case 2:
-                val &= 7;
                  env->cp15.c2_control = val;
-        env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> val);
-                env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> val);
          break;
          default:
          goto bad_reg;
@@ -1611,7 +1769,7 @@  void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
                  }
                  break;
              case 8: {
-                uint32_t phys_addr;
+                uint64_t phys_addr;
                  target_ulong page_size;
                  int prot;
                  int ret, is_user = op2 & 2;
@@ -2261,6 +2419,51 @@  bad_reg:
      return 0;
  }

+void HELPER(set_cp15_64)(CPUState *env, uint32_t insn, uint64_t val)
+{
+    int crm = insn & 0xf;
+    int opc1 = (insn >> 4) & 0xf;
+    if (crm == 2) {
+        switch (opc1) {
+        case 0:
+            env->cp15.c2_base0 = val;
+            tlb_flush(env, 0); // might change ASID
+            return;
+        case 1:
+            env->cp15.c2_base1 = val;
+            tlb_flush(env, 0); // might change ASID
+            return;
+        default:;
+        }
+    } else if (crm == 7) {
+        if (opc1 == 0) {
+            env->cp15.c7_par = val;
+            return;
+        }
+    }
+    cpu_abort(env, "Unimplemented cp15 register 64bit write (c%d[%d])\n", crm, opc1);
+}
+
+uint64_t HELPER(get_cp15_64)(CPUState *env, uint32_t insn)
+{
+    int crm = insn & 0xf;
+    int opc1 = (insn >> 4) & 0xf;
+    if (crm == 2) {
+        switch (opc1) {
+        case 0:
+            return env->cp15.c2_base0;
+        case 1:
+            return env->cp15.c2_base1;
+        default:;
+        }
+    } else if (crm == 7) {
+        if (opc1 == 0)
+            return env->cp15.c7_par;
+    }
+    cpu_abort(env, "Unimplemented cp15 register 64bit read (c%d[%d])\n", crm, opc1);
+    return 0;
+}
+
  void HELPER(set_r13_banked)(CPUState *env, uint32_t mode, uint32_t val)
  {
      if ((env->uncached_cpsr & CPSR_M) == mode) {
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 16dd5fc..bc8151c 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -60,7 +60,9 @@  DEF_HELPER_3(v7m_msr, void, env, i32, i32)
  DEF_HELPER_2(v7m_mrs, i32, env, i32)

  DEF_HELPER_3(set_cp15, void, env, i32, i32)
+DEF_HELPER_3(set_cp15_64, void, env, i32, i64)
  DEF_HELPER_2(get_cp15, i32, env, i32)
+DEF_HELPER_2(get_cp15_64, i64, env, i32)

  DEF_HELPER_3(set_cp, void, env, i32, i32)
  DEF_HELPER_2(get_cp, i32, env, i32)
diff --git a/target-arm/machine.c b/target-arm/machine.c
index f66b8df..8fa738e 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -27,11 +27,9 @@  void cpu_save(QEMUFile *f, void *opaque)
      qemu_put_be32(f, env->cp15.c1_coproc);
      qemu_put_be32(f, env->cp15.c1_xscaleauxcr);
      qemu_put_be32(f, env->cp15.c1_scr);
-    qemu_put_be32(f, env->cp15.c2_base0);
-    qemu_put_be32(f, env->cp15.c2_base1);
+    qemu_put_be64(f, env->cp15.c2_base0);
+    qemu_put_be64(f, env->cp15.c2_base1);
      qemu_put_be32(f, env->cp15.c2_control);
-    qemu_put_be32(f, env->cp15.c2_mask);
-    qemu_put_be32(f, env->cp15.c2_base_mask);
      qemu_put_be32(f, env->cp15.c2_data);
      qemu_put_be32(f, env->cp15.c2_insn);
      qemu_put_be32(f, env->cp15.c3);
@@ -42,7 +40,7 @@  void cpu_save(QEMUFile *f, void *opaque)
      }
      qemu_put_be32(f, env->cp15.c6_insn);
      qemu_put_be32(f, env->cp15.c6_data);
-    qemu_put_be32(f, env->cp15.c7_par);
+    qemu_put_be64(f, env->cp15.c7_par);
      qemu_put_be32(f, env->cp15.c9_insn);
      qemu_put_be32(f, env->cp15.c9_data);
      qemu_put_be32(f, env->cp15.c9_pmcr);
@@ -145,11 +143,9 @@  int cpu_load(QEMUFile *f, void *opaque, int version_id)
      env->cp15.c1_coproc = qemu_get_be32(f);
      env->cp15.c1_xscaleauxcr = qemu_get_be32(f);
      env->cp15.c1_scr = qemu_get_be32(f);
-    env->cp15.c2_base0 = qemu_get_be32(f);
-    env->cp15.c2_base1 = qemu_get_be32(f);
+    env->cp15.c2_base0 = qemu_get_be64(f);
+    env->cp15.c2_base1 = qemu_get_be64(f);
      env->cp15.c2_control = qemu_get_be32(f);
-    env->cp15.c2_mask = qemu_get_be32(f);
-    env->cp15.c2_base_mask = qemu_get_be32(f);
      env->cp15.c2_data = qemu_get_be32(f);
      env->cp15.c2_insn = qemu_get_be32(f);
      env->cp15.c3 = qemu_get_be32(f);
@@ -160,7 +156,7 @@  int cpu_load(QEMUFile *f, void *opaque, int version_id)
      }
      env->cp15.c6_insn = qemu_get_be32(f);
      env->cp15.c6_data = qemu_get_be32(f);
-    env->cp15.c7_par = qemu_get_be32(f);
+    env->cp15.c7_par = qemu_get_be64(f);
      env->cp15.c9_insn = qemu_get_be32(f);
      env->cp15.c9_data = qemu_get_be32(f);
      env->cp15.c9_pmcr = qemu_get_be32(f);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 280bfca..1f1dde0 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2559,17 +2559,9 @@  static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)

      /* M profile cores use memory mapped registers instead of cp15.  */
      if (arm_feature(env, ARM_FEATURE_M))
-    return 1;
+        return 1;

-    if ((insn & (1 << 25)) == 0) {
-        if (insn & (1 << 20)) {
-            /* mrrc */
-            return 1;
-        }
-        /* mcrr.  Used for block cache operations, so implement as no-op.  */
-        return 0;
-    }
-    if ((insn & (1 << 4)) == 0) {
+    if ((insn & (1 << 4)) == 0 && (insn & (1 << 25))) {
          /* cdp */
          return 1;
      }
@@ -2636,16 +2628,34 @@  static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)

      tmp2 = tcg_const_i32(insn);
      if (insn & ARM_CP_RW_BIT) {
-        tmp = tcg_temp_new_i32();
-        gen_helper_get_cp15(tmp, cpu_env, tmp2);
-        /* If the destination register is r15 then sets condition codes.  */
-        if (rd != 15)
-            store_reg(s, rd, tmp);
-        else
-            tcg_temp_free_i32(tmp);
+        if ((insn & (1 << 25))) {
+            tmp = tcg_temp_new_i32();
+            gen_helper_get_cp15(tmp, cpu_env, tmp2);
+            /* If the destination register is r15 then sets condition codes.  */
+            if (rd != 15)
+                store_reg(s, rd, tmp);
+            else
+                tcg_temp_free_i32(tmp);
+        } else {
+            int rd1 = (insn >> 16) & 0xf;
+            TCGv_i64 tmp1 = tcg_temp_new_i64();
+            gen_helper_get_cp15_64(tmp1, cpu_env, tmp2);
+            tcg_gen_trunc_i64_i32(cpu_R[rd], tmp1);
+            tcg_gen_shri_i64(tmp1, tmp1, 32);
+            tcg_gen_trunc_i64_i32(cpu_R[rd1], tmp1);
+            tcg_temp_free_i64(tmp1);
+        }
      } else {
          tmp = load_reg(s, rd);
-        gen_helper_set_cp15(cpu_env, tmp2, tmp);
+        if ((insn & (1 << 25))) {
+            gen_helper_set_cp15(cpu_env, tmp2, tmp);
+        } else {
+            int rd1 = (insn >> 16) & 0xf;
+            TCGv_i64 tmp1 = tcg_temp_new_i64();
+            tcg_gen_concat_i32_i64(tmp1, cpu_R[rd], cpu_R[rd1]);
+            gen_helper_set_cp15_64(cpu_env, tmp2, tmp1);
+            tcg_temp_free_i64(tmp1);
+        }
          tcg_temp_free_i32(tmp);
          /* Normally we would always end the TB here, but Linux
           * arch/arm/mach-pxa/sleep.S expects two instructions following