Patchwork target-arm: add support for v8 AES instructions

login
register
mail settings
Submitter Ard Biesheuvel
Date Nov. 6, 2013, 2:21 p.m.
Message ID <1383747715-8441-1-git-send-email-ard.biesheuvel@linaro.org>
Download mbox | patch
Permalink /patch/288926/
State New
Headers show

Comments

Ard Biesheuvel - Nov. 6, 2013, 2:21 p.m.
This adds support for the AESE/AESD/AESMC/AESIMC instructions that
are available on some v8 implementations of Aarch32.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 target-arm/Makefile.objs   |   1 +
 target-arm/cpu.c           |   1 +
 target-arm/cpu.h           |   1 +
 target-arm/crypto_helper.c | 172 +++++++++++++++++++++++++++++++++++++++++++++
 target-arm/helper.h        |   3 +
 target-arm/translate.c     |  18 +++++
 6 files changed, 196 insertions(+)
 create mode 100644 target-arm/crypto_helper.c
Ard Biesheuvel - Nov. 14, 2013, 8:32 a.m.
Ping?

Regards,
Ard.



On 6 November 2013 15:21, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> This adds support for the AESE/AESD/AESMC/AESIMC instructions that
> are available on some v8 implementations of Aarch32.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  target-arm/Makefile.objs   |   1 +
>  target-arm/cpu.c           |   1 +
>  target-arm/cpu.h           |   1 +
>  target-arm/crypto_helper.c | 172 +++++++++++++++++++++++++++++++++++++++++++++
>  target-arm/helper.h        |   3 +
>  target-arm/translate.c     |  18 +++++
>  6 files changed, 196 insertions(+)
>  create mode 100644 target-arm/crypto_helper.c
>
> diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
> index 2d9f77f..6840f65 100644
> --- a/target-arm/Makefile.objs
> +++ b/target-arm/Makefile.objs
> @@ -5,3 +5,4 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
>  obj-y += translate.o op_helper.o helper.o cpu.o
>  obj-y += neon_helper.o iwmmxt_helper.o
>  obj-y += gdbstub.o
> +obj-y += crypto_helper.o
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index f0ed62f..4ba3246 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -169,6 +169,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>          set_feature(env, ARM_FEATURE_V7);
>          set_feature(env, ARM_FEATURE_ARM_DIV);
>          set_feature(env, ARM_FEATURE_LPAE);
> +        set_feature(env, ARM_FEATURE_V8_AES);
>      }
>      if (arm_feature(env, ARM_FEATURE_V7)) {
>          set_feature(env, ARM_FEATURE_VAPA);
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index a44d56f..8e4593b 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -395,6 +395,7 @@ enum arm_features {
>      ARM_FEATURE_LPAE, /* has Large Physical Address Extension */
>      ARM_FEATURE_V8,
>      ARM_FEATURE_TRUSTZONE, /* TrustZone Security Extensions. */
> +    ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
>  };
>
>  static inline int arm_feature(CPUARMState *env, int feature)
> diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
> new file mode 100644
> index 0000000..f4b633f
> --- /dev/null
> +++ b/target-arm/crypto_helper.c
> @@ -0,0 +1,172 @@
> +/*
> + * crypto_helper.c - emulate v8 Crypto Extensions instructions
> + *
> + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <stdlib.h>
> +
> +#include "cpu.h"
> +#include "exec/exec-all.h"
> +#include "helper.h"
> +
> +union AES_STATE {
> +    uint8_t    bytes[16];
> +    uint64_t   l[2];
> +};
> +
> +static void add_sub_shift(union AES_STATE *st, union AES_STATE *rk, int inv);
> +static void mix_columns(union AES_STATE *out, union AES_STATE *in);
> +static void inv_mix_columns_pre(union AES_STATE *out);
> +
> +void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, uint32_t e)
> +{
> +    union AES_STATE sm = { .l = {
> +        float64_val(env->vfp.regs[rm]),
> +        float64_val(env->vfp.regs[rm + 1])
> +    } };
> +    union AES_STATE sd = { .l = {
> +        float64_val(env->vfp.regs[rd]),
> +        float64_val(env->vfp.regs[rd + 1])
> +    } };
> +
> +    add_sub_shift(&sd, &sm, e);
> +
> +    env->vfp.regs[rd] = make_float64(sd.l[0]);
> +    env->vfp.regs[rd + 1] = make_float64(sd.l[1]);
> +}
> +
> +void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, uint32_t e)
> +{
> +    union AES_STATE sd, sm = { .l = {
> +        float64_val(env->vfp.regs[rm]),
> +        float64_val(env->vfp.regs[rm + 1])
> +    } };
> +
> +    if (e) {
> +        inv_mix_columns_pre(&sm);
> +    }
> +    mix_columns(&sd, &sm);
> +
> +    env->vfp.regs[rd] = make_float64(sd.l[0]);
> +    env->vfp.regs[rd + 1] = make_float64(sd.l[1]);
> +}
> +
> +#define gf8_mul_x(a) \
> +    (((a) << 1) ^ (((a) & 0x80) ? 0x1b : 0))
> +
> +static void mix_columns(union AES_STATE *out, union AES_STATE *in)
> +{
> +    int i;
> +
> +    for (i = 0; i < 16; i++) {
> +        out->bytes[i] =
> +            gf8_mul_x(in->bytes[i]) ^
> +            gf8_mul_x(in->bytes[((i + 1) % 4) | (i & ~3)]) ^
> +                in->bytes[((i + 1) % 4) | (i & ~3)] ^
> +                in->bytes[((i + 2) % 4) | (i & ~3)] ^
> +                in->bytes[((i + 3) % 4) | (i & ~3)];
> +    }
> +}
> +
> +#define gf8_mul_x2(a) \
> +    (((a) << 2) ^ (((a) & 0x80) ? 0x36 : 0) ^ (((a) & 0x40) ? 0x1b : 0))
> +
> +static void inv_mix_columns_pre(union AES_STATE *out)
> +{
> +    union AES_STATE in = *out;
> +    int i;
> +
> +    for (i = 0; i < 16; i++) {
> +        out->bytes[i] = gf8_mul_x2(in.bytes[i]) ^ in.bytes[i] ^
> +                gf8_mul_x2(in.bytes[i ^ 2]);
> +    }
> +}
> +
> +static void add_sub_shift(union AES_STATE *st, union AES_STATE *rk, int inv)
> +{
> +    static uint8_t const sbox[][256] = { {
> +        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
> +        0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
> +        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
> +        0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
> +        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
> +        0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
> +        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
> +        0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
> +        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
> +        0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
> +        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
> +        0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
> +        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
> +        0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
> +        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
> +        0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
> +        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
> +        0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
> +        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
> +        0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
> +        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
> +        0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
> +        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
> +        0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
> +        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
> +        0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
> +        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
> +        0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
> +        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
> +        0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
> +        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
> +        0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
> +    }, {
> +        0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
> +        0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
> +        0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
> +        0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
> +        0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
> +        0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
> +        0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
> +        0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
> +        0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
> +        0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
> +        0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
> +        0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
> +        0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
> +        0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
> +        0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
> +        0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
> +        0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
> +        0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
> +        0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
> +        0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
> +        0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
> +        0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
> +        0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
> +        0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
> +        0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
> +        0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
> +        0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
> +        0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
> +        0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
> +        0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
> +        0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
> +        0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
> +    } };
> +    static uint8_t const permute[][16] = {
> +        { 0,  5, 10, 15, 4, 9, 14,  3, 8, 13, 2,  7, 12, 1, 6, 11 },
> +        { 0, 13, 10,  7, 4, 1, 14, 11, 8,  5, 2, 15, 12, 9, 6,  3 },
> +    };
> +    int i;
> +
> +    rk->l[0] ^= st->l[0];
> +    rk->l[1] ^= st->l[1];
> +
> +    for (i = 0; i < 16; i++) {
> +        st->bytes[i] = sbox[inv][rk->bytes[permute[inv][i]]];
> +    }
> +}
> diff --git a/target-arm/helper.h b/target-arm/helper.h
> index 63ae13a..a42f550 100644
> --- a/target-arm/helper.h
> +++ b/target-arm/helper.h
> @@ -458,4 +458,7 @@ DEF_HELPER_3(neon_qzip8, void, env, i32, i32)
>  DEF_HELPER_3(neon_qzip16, void, env, i32, i32)
>  DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
>
> +DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
> +DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
> +
>  #include "exec/def-helper.h"
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 9da4ea1..c147491 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4346,6 +4346,8 @@ static const uint8_t neon_3r_sizes[] = {
>  #define NEON_2RM_VREV16 2
>  #define NEON_2RM_VPADDL 4
>  #define NEON_2RM_VPADDL_U 5
> +#define NEON_2RM_AESE 6 /* Includes AESD */
> +#define NEON_2RM_AESMC 7 /* Includes AESIMC */
>  #define NEON_2RM_VCLS 8
>  #define NEON_2RM_VCLZ 9
>  #define NEON_2RM_VCNT 10
> @@ -4403,6 +4405,8 @@ static const uint8_t neon_2rm_sizes[] = {
>      [NEON_2RM_VREV16] = 0x1,
>      [NEON_2RM_VPADDL] = 0x7,
>      [NEON_2RM_VPADDL_U] = 0x7,
> +    [NEON_2RM_AESE] = 0x1,
> +    [NEON_2RM_AESMC] = 0x1,
>      [NEON_2RM_VCLS] = 0x7,
>      [NEON_2RM_VCLZ] = 0x7,
>      [NEON_2RM_VCNT] = 0x1,
> @@ -5925,6 +5929,20 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
>                      tcg_temp_free_i32(tmp2);
>                      tcg_temp_free_i32(tmp3);
>                      break;
> +                case NEON_2RM_AESE: case NEON_2RM_AESMC:
> +                    if (!arm_feature(env, ARM_FEATURE_V8_AES)) {
> +                        return 1;
> +                    }
> +                    tmp = tcg_const_i32(rd);
> +                    tmp2 = tcg_const_i32(rm);
> +                    tmp3 = tcg_const_i32((insn >> 6) & 1);
> +
> +                    if (op == NEON_2RM_AESE) {
> +                        gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3);
> +                    } else {
> +                        gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3);
> +                    }
> +                    break;
>                  default:
>                  elementwise:
>                      for (pass = 0; pass < (q ? 4 : 2); pass++) {
> --
> 1.8.3.2
>
Peter Maydell - Dec. 2, 2013, 3:06 p.m.
On 6 November 2013 14:21, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> This adds support for the AESE/AESD/AESMC/AESIMC instructions that
> are available on some v8 implementations of Aarch32.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Hi; thanks for this patch. I have a few minor review comments,
but it looks pretty good.

(Do you have any plans to do the remaining instructions
(SHA1, SHA256, VMULL)? Not a requirement, but it would be
nice to know for planning purposes :-))

> ---
>  target-arm/Makefile.objs   |   1 +
>  target-arm/cpu.c           |   1 +
>  target-arm/cpu.h           |   1 +
>  target-arm/crypto_helper.c | 172 +++++++++++++++++++++++++++++++++++++++++++++
>  target-arm/helper.h        |   3 +
>  target-arm/translate.c     |  18 +++++
>  6 files changed, 196 insertions(+)
>  create mode 100644 target-arm/crypto_helper.c
>
> diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
> index 2d9f77f..6840f65 100644
> --- a/target-arm/Makefile.objs
> +++ b/target-arm/Makefile.objs
> @@ -5,3 +5,4 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
>  obj-y += translate.o op_helper.o helper.o cpu.o
>  obj-y += neon_helper.o iwmmxt_helper.o
>  obj-y += gdbstub.o
> +obj-y += crypto_helper.o
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index f0ed62f..4ba3246 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -169,6 +169,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>          set_feature(env, ARM_FEATURE_V7);
>          set_feature(env, ARM_FEATURE_ARM_DIV);
>          set_feature(env, ARM_FEATURE_LPAE);
> +        set_feature(env, ARM_FEATURE_V8_AES);
>      }
>      if (arm_feature(env, ARM_FEATURE_V7)) {
>          set_feature(env, ARM_FEATURE_VAPA);
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index a44d56f..8e4593b 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -395,6 +395,7 @@ enum arm_features {
>      ARM_FEATURE_LPAE, /* has Large Physical Address Extension */
>      ARM_FEATURE_V8,
>      ARM_FEATURE_TRUSTZONE, /* TrustZone Security Extensions. */
> +    ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */

It looks like you've maybe based this on the wrong tree? I get
conflicts trying to apply it to qemu upstream master because
master doesn't have ARM_FEATURE_TRUSTZONE.

>  };
>
>  static inline int arm_feature(CPUARMState *env, int feature)
> diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
> new file mode 100644
> index 0000000..f4b633f
> --- /dev/null
> +++ b/target-arm/crypto_helper.c
> @@ -0,0 +1,172 @@
> +/*
> + * crypto_helper.c - emulate v8 Crypto Extensions instructions
> + *
> + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <stdlib.h>
> +
> +#include "cpu.h"
> +#include "exec/exec-all.h"
> +#include "helper.h"
> +
> +union AES_STATE {
> +    uint8_t    bytes[16];
> +    uint64_t   l[2];
> +};
> +
> +static void add_sub_shift(union AES_STATE *st, union AES_STATE *rk, int inv);
> +static void mix_columns(union AES_STATE *out, union AES_STATE *in);
> +static void inv_mix_columns_pre(union AES_STATE *out);
> +
> +void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, uint32_t e)

"e" is rather cryptic as a parameter name...

> +{
> +    union AES_STATE sm = { .l = {
> +        float64_val(env->vfp.regs[rm]),
> +        float64_val(env->vfp.regs[rm + 1])
> +    } };
> +    union AES_STATE sd = { .l = {
> +        float64_val(env->vfp.regs[rd]),
> +        float64_val(env->vfp.regs[rd + 1])
> +    } };
> +
> +    add_sub_shift(&sd, &sm, e);
> +
> +    env->vfp.regs[rd] = make_float64(sd.l[0]);
> +    env->vfp.regs[rd + 1] = make_float64(sd.l[1]);
> +}
> +
> +void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, uint32_t e)

Line over 80 characters (scripts/checkpatch.pl will catch this sort of nit).
"e" is still cryptic, and even less applicable here :-)

> +{
> +    union AES_STATE sd, sm = { .l = {
> +        float64_val(env->vfp.regs[rm]),
> +        float64_val(env->vfp.regs[rm + 1])
> +    } };
> +
> +    if (e) {
> +        inv_mix_columns_pre(&sm);
> +    }
> +    mix_columns(&sd, &sm);
> +
> +    env->vfp.regs[rd] = make_float64(sd.l[0]);
> +    env->vfp.regs[rd + 1] = make_float64(sd.l[1]);
> +}
> +
> +#define gf8_mul_x(a) \
> +    (((a) << 1) ^ (((a) & 0x80) ? 0x1b : 0))
> +
> +static void mix_columns(union AES_STATE *out, union AES_STATE *in)
> +{
> +    int i;
> +
> +    for (i = 0; i < 16; i++) {
> +        out->bytes[i] =
> +            gf8_mul_x(in->bytes[i]) ^
> +            gf8_mul_x(in->bytes[((i + 1) % 4) | (i & ~3)]) ^
> +                in->bytes[((i + 1) % 4) | (i & ~3)] ^
> +                in->bytes[((i + 2) % 4) | (i & ~3)] ^
> +                in->bytes[((i + 3) % 4) | (i & ~3)];
> +    }
> +}
> +
> +#define gf8_mul_x2(a) \
> +    (((a) << 2) ^ (((a) & 0x80) ? 0x36 : 0) ^ (((a) & 0x40) ? 0x1b : 0))
> +
> +static void inv_mix_columns_pre(union AES_STATE *out)
> +{
> +    union AES_STATE in = *out;
> +    int i;
> +
> +    for (i = 0; i < 16; i++) {
> +        out->bytes[i] = gf8_mul_x2(in.bytes[i]) ^ in.bytes[i] ^
> +                gf8_mul_x2(in.bytes[i ^ 2]);
> +    }
> +}
> +
> +static void add_sub_shift(union AES_STATE *st, union AES_STATE *rk, int inv)
> +{
> +    static uint8_t const sbox[][256] = { {
> +        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
> +        0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
> +        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
> +        0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
> +        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
> +        0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
> +        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
> +        0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
> +        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
> +        0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
> +        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
> +        0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
> +        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
> +        0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
> +        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
> +        0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
> +        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
> +        0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
> +        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
> +        0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
> +        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
> +        0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
> +        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
> +        0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
> +        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
> +        0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
> +        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
> +        0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
> +        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
> +        0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
> +        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
> +        0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
> +    }, {
> +        0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
> +        0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
> +        0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
> +        0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
> +        0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
> +        0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
> +        0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
> +        0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
> +        0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
> +        0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
> +        0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
> +        0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
> +        0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
> +        0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
> +        0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
> +        0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
> +        0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
> +        0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
> +        0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
> +        0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
> +        0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
> +        0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
> +        0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
> +        0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
> +        0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
> +        0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
> +        0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
> +        0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
> +        0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
> +        0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
> +        0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
> +        0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
> +    } };
> +    static uint8_t const permute[][16] = {
> +        { 0,  5, 10, 15, 4, 9, 14,  3, 8, 13, 2,  7, 12, 1, 6, 11 },
> +        { 0, 13, 10,  7, 4, 1, 14, 11, 8,  5, 2, 15, 12, 9, 6,  3 },
> +    };
> +    int i;
> +
> +    rk->l[0] ^= st->l[0];
> +    rk->l[1] ^= st->l[1];
> +
> +    for (i = 0; i < 16; i++) {
> +        st->bytes[i] = sbox[inv][rk->bytes[permute[inv][i]]];
> +    }
> +}

I'm going to have to take the correctness of the crypto magic
on trust :-)

(I should be able to do a cross-test against a reference
implementation later this week I think.)

> diff --git a/target-arm/helper.h b/target-arm/helper.h
> index 63ae13a..a42f550 100644
> --- a/target-arm/helper.h
> +++ b/target-arm/helper.h
> @@ -458,4 +458,7 @@ DEF_HELPER_3(neon_qzip8, void, env, i32, i32)
>  DEF_HELPER_3(neon_qzip16, void, env, i32, i32)
>  DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
>
> +DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
> +DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
> +
>  #include "exec/def-helper.h"
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 9da4ea1..c147491 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4346,6 +4346,8 @@ static const uint8_t neon_3r_sizes[] = {
>  #define NEON_2RM_VREV16 2
>  #define NEON_2RM_VPADDL 4
>  #define NEON_2RM_VPADDL_U 5
> +#define NEON_2RM_AESE 6 /* Includes AESD */
> +#define NEON_2RM_AESMC 7 /* Includes AESIMC */
>  #define NEON_2RM_VCLS 8
>  #define NEON_2RM_VCLZ 9
>  #define NEON_2RM_VCNT 10
> @@ -4403,6 +4405,8 @@ static const uint8_t neon_2rm_sizes[] = {
>      [NEON_2RM_VREV16] = 0x1,
>      [NEON_2RM_VPADDL] = 0x7,
>      [NEON_2RM_VPADDL_U] = 0x7,
> +    [NEON_2RM_AESE] = 0x1,
> +    [NEON_2RM_AESMC] = 0x1,
>      [NEON_2RM_VCLS] = 0x7,
>      [NEON_2RM_VCLZ] = 0x7,
>      [NEON_2RM_VCNT] = 0x1,
> @@ -5925,6 +5929,20 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
>                      tcg_temp_free_i32(tmp2);
>                      tcg_temp_free_i32(tmp3);
>                      break;
> +                case NEON_2RM_AESE: case NEON_2RM_AESMC:
> +                    if (!arm_feature(env, ARM_FEATURE_V8_AES)) {

This needs an "|| ((rm | rd) & 1)"  clause adding, I think
(pseudocode says that UNDEFs).

> +                        return 1;
> +                    }
> +                    tmp = tcg_const_i32(rd);
> +                    tmp2 = tcg_const_i32(rm);
> +                    tmp3 = tcg_const_i32((insn >> 6) & 1);

tmp3 = tcg_const_i32(extract32(insn, 6, 1));
(extract32() is fairly new so a lot of existing code does by-hand
bit manipulation, but I'm trying to encourage use of it in new code.)

We could also use a comment to save people wading through four
different parts of the ARM ARM:
 /* Bit 6 is the lowest opcode bit; it distinguishes AESE from AESD
  * and AESIMC from AESMC
  */

> +
> +                    if (op == NEON_2RM_AESE) {
> +                        gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3);
> +                    } else {
> +                        gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3);
> +                    }

You need
   tcg_free_temp_i32(tmp1);
   tcg_free_temp_i32(tmp2);
   tcg_free_temp_i32(tmp3);

here, or we will leak TCG temps.

> +                    break;
>                  default:
>                  elementwise:
>                      for (pass = 0; pass < (q ? 4 : 2); pass++) {
> --
> 1.8.3.2
>

thanks
-- PMM
Ard Biesheuvel - Dec. 4, 2013, 7:04 a.m.
On 2 December 2013 17:06, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 6 November 2013 14:21, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
>> This adds support for the AESE/AESD/AESMC/AESIMC instructions that
>> are available on some v8 implementations of Aarch32.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>
> Hi; thanks for this patch. I have a few minor review comments,
> but it looks pretty good.
>

Thanks.

> (Do you have any plans to do the remaining instructions
> (SHA1, SHA256, VMULL)? Not a requirement, but it would be
> nice to know for planning purposes :-))
>

Unlikely for CRC and poly64 multiply, as they have no use in crypto.
The SHA* instructions perhaps, but this is not planned, and even the
OpenSSL work itself that I am doing is put on hold at the moment so
for planning purposes it is better to assume not. I would be happy to
help out with reviewing, though, if someone else picks this up in the
mean time.

>> ---
>>  target-arm/Makefile.objs   |   1 +
>>  target-arm/cpu.c           |   1 +
>>  target-arm/cpu.h           |   1 +
>>  target-arm/crypto_helper.c | 172 +++++++++++++++++++++++++++++++++++++++++++++
>>  target-arm/helper.h        |   3 +
>>  target-arm/translate.c     |  18 +++++
>>  6 files changed, 196 insertions(+)
>>  create mode 100644 target-arm/crypto_helper.c
>>
>> diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
>> index 2d9f77f..6840f65 100644
>> --- a/target-arm/Makefile.objs
>> +++ b/target-arm/Makefile.objs
>> @@ -5,3 +5,4 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
>>  obj-y += translate.o op_helper.o helper.o cpu.o
>>  obj-y += neon_helper.o iwmmxt_helper.o
>>  obj-y += gdbstub.o
>> +obj-y += crypto_helper.o
>> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
>> index f0ed62f..4ba3246 100644
>> --- a/target-arm/cpu.c
>> +++ b/target-arm/cpu.c
>> @@ -169,6 +169,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>>          set_feature(env, ARM_FEATURE_V7);
>>          set_feature(env, ARM_FEATURE_ARM_DIV);
>>          set_feature(env, ARM_FEATURE_LPAE);
>> +        set_feature(env, ARM_FEATURE_V8_AES);
>>      }
>>      if (arm_feature(env, ARM_FEATURE_V7)) {
>>          set_feature(env, ARM_FEATURE_VAPA);
>> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
>> index a44d56f..8e4593b 100644
>> --- a/target-arm/cpu.h
>> +++ b/target-arm/cpu.h
>> @@ -395,6 +395,7 @@ enum arm_features {
>>      ARM_FEATURE_LPAE, /* has Large Physical Address Extension */
>>      ARM_FEATURE_V8,
>>      ARM_FEATURE_TRUSTZONE, /* TrustZone Security Extensions. */
>> +    ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
>
> It looks like you've maybe based this on the wrong tree? I get
> conflicts trying to apply it to qemu upstream master because
> master doesn't have ARM_FEATURE_TRUSTZONE.
>

OK, I will rebase on top of upstream master, I probably used a tree
from some Linaro repo.

>>  };
>>
>>  static inline int arm_feature(CPUARMState *env, int feature)
>> diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
>> new file mode 100644
>> index 0000000..f4b633f
>> --- /dev/null
>> +++ b/target-arm/crypto_helper.c
>> @@ -0,0 +1,172 @@
>> +/*
>> + * crypto_helper.c - emulate v8 Crypto Extensions instructions
>> + *
>> + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2 of the License, or (at your option) any later version.
>> + */
>> +
>> +#include <stdlib.h>
>> +
>> +#include "cpu.h"
>> +#include "exec/exec-all.h"
>> +#include "helper.h"
>> +
>> +union AES_STATE {
>> +    uint8_t    bytes[16];
>> +    uint64_t   l[2];
>> +};
>> +
>> +static void add_sub_shift(union AES_STATE *st, union AES_STATE *rk, int inv);
>> +static void mix_columns(union AES_STATE *out, union AES_STATE *in);
>> +static void inv_mix_columns_pre(union AES_STATE *out);
>> +
>> +void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, uint32_t e)
>
> "e" is rather cryptic as a parameter name...
>

OK, will change that

>> +{
>> +    union AES_STATE sm = { .l = {
>> +        float64_val(env->vfp.regs[rm]),
>> +        float64_val(env->vfp.regs[rm + 1])
>> +    } };
>> +    union AES_STATE sd = { .l = {
>> +        float64_val(env->vfp.regs[rd]),
>> +        float64_val(env->vfp.regs[rd + 1])
>> +    } };
>> +
>> +    add_sub_shift(&sd, &sm, e);
>> +
>> +    env->vfp.regs[rd] = make_float64(sd.l[0]);
>> +    env->vfp.regs[rd + 1] = make_float64(sd.l[1]);
>> +}
>> +
>> +void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, uint32_t e)
>
> Line over 80 characters (scripts/checkpatch.pl will catch this sort of nit).
> "e" is still cryptic, and even less applicable here :-)
>

OK.

>> +{
>> +    union AES_STATE sd, sm = { .l = {
>> +        float64_val(env->vfp.regs[rm]),
>> +        float64_val(env->vfp.regs[rm + 1])
>> +    } };
[...]
>> +        st->bytes[i] = sbox[inv][rk->bytes[permute[inv][i]]];
>> +    }
>> +}
>
> I'm going to have to take the correctness of the crypto magic
> on trust :-)
>
> (I should be able to do a cross-test against a reference
> implementation later this week I think.)
>

Please do. I know Steve Capper has the Fast Model plugin, and he has
tested my kernel version before, but it would be good to check
explicitly against the Aarch32 versions of the instructions.

>> diff --git a/target-arm/helper.h b/target-arm/helper.h
>> index 63ae13a..a42f550 100644
>> --- a/target-arm/helper.h
>> +++ b/target-arm/helper.h
>> @@ -458,4 +458,7 @@ DEF_HELPER_3(neon_qzip8, void, env, i32, i32)
>>  DEF_HELPER_3(neon_qzip16, void, env, i32, i32)
>>  DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
>>
>> +DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
>> +DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
>> +
>>  #include "exec/def-helper.h"
>> diff --git a/target-arm/translate.c b/target-arm/translate.c
>> index 9da4ea1..c147491 100644
>> --- a/target-arm/translate.c
>> +++ b/target-arm/translate.c
>> @@ -4346,6 +4346,8 @@ static const uint8_t neon_3r_sizes[] = {
>>  #define NEON_2RM_VREV16 2
>>  #define NEON_2RM_VPADDL 4
>>  #define NEON_2RM_VPADDL_U 5
>> +#define NEON_2RM_AESE 6 /* Includes AESD */
>> +#define NEON_2RM_AESMC 7 /* Includes AESIMC */
>>  #define NEON_2RM_VCLS 8
>>  #define NEON_2RM_VCLZ 9
>>  #define NEON_2RM_VCNT 10
>> @@ -4403,6 +4405,8 @@ static const uint8_t neon_2rm_sizes[] = {
>>      [NEON_2RM_VREV16] = 0x1,
>>      [NEON_2RM_VPADDL] = 0x7,
>>      [NEON_2RM_VPADDL_U] = 0x7,
>> +    [NEON_2RM_AESE] = 0x1,
>> +    [NEON_2RM_AESMC] = 0x1,
>>      [NEON_2RM_VCLS] = 0x7,
>>      [NEON_2RM_VCLZ] = 0x7,
>>      [NEON_2RM_VCNT] = 0x1,
>> @@ -5925,6 +5929,20 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
>>                      tcg_temp_free_i32(tmp2);
>>                      tcg_temp_free_i32(tmp3);
>>                      break;
>> +                case NEON_2RM_AESE: case NEON_2RM_AESMC:
>> +                    if (!arm_feature(env, ARM_FEATURE_V8_AES)) {
>
> This needs an "|| ((rm | rd) & 1)"  clause adding, I think
> (pseudocode says that UNDEFs).
>
>> +                        return 1;
>> +                    }
>> +                    tmp = tcg_const_i32(rd);
>> +                    tmp2 = tcg_const_i32(rm);
>> +                    tmp3 = tcg_const_i32((insn >> 6) & 1);
>
> tmp3 = tcg_const_i32(extract32(insn, 6, 1));
> (extract32() is fairly new so a lot of existing code does by-hand
> bit manipulation, but I'm trying to encourage use of it in new code.)
>
> We could also use a comment to save people wading through four
> different parts of the ARM ARM:
>  /* Bit 6 is the lowest opcode bit; it distinguishes AESE from AESD
>   * and AESIMC from AESMC
>   */
>

OK.

>> +
>> +                    if (op == NEON_2RM_AESE) {
>> +                        gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3);
>> +                    } else {
>> +                        gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3);
>> +                    }
>
> You need
>    tcg_free_temp_i32(tmp1);
>    tcg_free_temp_i32(tmp2);
>    tcg_free_temp_i32(tmp3);
>
> here, or we will leak TCG temps.
>

OK

>> +                    break;
>>                  default:
>>                  elementwise:
>>                      for (pass = 0; pass < (q ? 4 : 2); pass++) {
>> --
>> 1.8.3.2
>>
>

I will post an updated patch by the end of the week.

Regards,
Ard.

Patch

diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index 2d9f77f..6840f65 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -5,3 +5,4 @@  obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
 obj-y += gdbstub.o
+obj-y += crypto_helper.o
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index f0ed62f..4ba3246 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -169,6 +169,7 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         set_feature(env, ARM_FEATURE_V7);
         set_feature(env, ARM_FEATURE_ARM_DIV);
         set_feature(env, ARM_FEATURE_LPAE);
+        set_feature(env, ARM_FEATURE_V8_AES);
     }
     if (arm_feature(env, ARM_FEATURE_V7)) {
         set_feature(env, ARM_FEATURE_VAPA);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index a44d56f..8e4593b 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -395,6 +395,7 @@  enum arm_features {
     ARM_FEATURE_LPAE, /* has Large Physical Address Extension */
     ARM_FEATURE_V8,
     ARM_FEATURE_TRUSTZONE, /* TrustZone Security Extensions. */
+    ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
new file mode 100644
index 0000000..f4b633f
--- /dev/null
+++ b/target-arm/crypto_helper.c
@@ -0,0 +1,172 @@ 
+/*
+ * crypto_helper.c - emulate v8 Crypto Extensions instructions
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdlib.h>
+
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "helper.h"
+
+union AES_STATE {
+    uint8_t    bytes[16];
+    uint64_t   l[2];
+};
+
+static void add_sub_shift(union AES_STATE *st, union AES_STATE *rk, int inv);
+static void mix_columns(union AES_STATE *out, union AES_STATE *in);
+static void inv_mix_columns_pre(union AES_STATE *out);
+
+void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, uint32_t e)
+{
+    union AES_STATE sm = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+    union AES_STATE sd = { .l = {
+        float64_val(env->vfp.regs[rd]),
+        float64_val(env->vfp.regs[rd + 1])
+    } };
+
+    add_sub_shift(&sd, &sm, e);
+
+    env->vfp.regs[rd] = make_float64(sd.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(sd.l[1]);
+}
+
+void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, uint32_t e)
+{
+    union AES_STATE sd, sm = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+
+    if (e) {
+        inv_mix_columns_pre(&sm);
+    }
+    mix_columns(&sd, &sm);
+
+    env->vfp.regs[rd] = make_float64(sd.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(sd.l[1]);
+}
+
+#define gf8_mul_x(a) \
+    (((a) << 1) ^ (((a) & 0x80) ? 0x1b : 0))
+
+static void mix_columns(union AES_STATE *out, union AES_STATE *in)
+{
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        out->bytes[i] =
+            gf8_mul_x(in->bytes[i]) ^
+            gf8_mul_x(in->bytes[((i + 1) % 4) | (i & ~3)]) ^
+                in->bytes[((i + 1) % 4) | (i & ~3)] ^
+                in->bytes[((i + 2) % 4) | (i & ~3)] ^
+                in->bytes[((i + 3) % 4) | (i & ~3)];
+    }
+}
+
+#define gf8_mul_x2(a) \
+    (((a) << 2) ^ (((a) & 0x80) ? 0x36 : 0) ^ (((a) & 0x40) ? 0x1b : 0))
+
+static void inv_mix_columns_pre(union AES_STATE *out)
+{
+    union AES_STATE in = *out;
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        out->bytes[i] = gf8_mul_x2(in.bytes[i]) ^ in.bytes[i] ^
+                gf8_mul_x2(in.bytes[i ^ 2]);
+    }
+}
+
+static void add_sub_shift(union AES_STATE *st, union AES_STATE *rk, int inv)
+{
+    static uint8_t const sbox[][256] = { {
+        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+        0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+        0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+        0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+        0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+        0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+        0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+        0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+        0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+        0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+        0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+        0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+        0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+        0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+        0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+        0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+        0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+    }, {
+        0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
+        0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+        0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+        0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+        0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
+        0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+        0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
+        0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+        0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+        0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+        0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
+        0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+        0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
+        0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+        0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+        0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+        0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
+        0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+        0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
+        0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+        0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+        0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+        0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
+        0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+        0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
+        0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+        0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+        0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+        0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
+        0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+        0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
+        0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+    } };
+    static uint8_t const permute[][16] = {
+        { 0,  5, 10, 15, 4, 9, 14,  3, 8, 13, 2,  7, 12, 1, 6, 11 },
+        { 0, 13, 10,  7, 4, 1, 14, 11, 8,  5, 2, 15, 12, 9, 6,  3 },
+    };
+    int i;
+
+    rk->l[0] ^= st->l[0];
+    rk->l[1] ^= st->l[1];
+
+    for (i = 0; i < 16; i++) {
+        st->bytes[i] = sbox[inv][rk->bytes[permute[inv][i]]];
+    }
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 63ae13a..a42f550 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -458,4 +458,7 @@  DEF_HELPER_3(neon_qzip8, void, env, i32, i32)
 DEF_HELPER_3(neon_qzip16, void, env, i32, i32)
 DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
 
+DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
+DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
+
 #include "exec/def-helper.h"
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9da4ea1..c147491 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4346,6 +4346,8 @@  static const uint8_t neon_3r_sizes[] = {
 #define NEON_2RM_VREV16 2
 #define NEON_2RM_VPADDL 4
 #define NEON_2RM_VPADDL_U 5
+#define NEON_2RM_AESE 6 /* Includes AESD */
+#define NEON_2RM_AESMC 7 /* Includes AESIMC */
 #define NEON_2RM_VCLS 8
 #define NEON_2RM_VCLZ 9
 #define NEON_2RM_VCNT 10
@@ -4403,6 +4405,8 @@  static const uint8_t neon_2rm_sizes[] = {
     [NEON_2RM_VREV16] = 0x1,
     [NEON_2RM_VPADDL] = 0x7,
     [NEON_2RM_VPADDL_U] = 0x7,
+    [NEON_2RM_AESE] = 0x1,
+    [NEON_2RM_AESMC] = 0x1,
     [NEON_2RM_VCLS] = 0x7,
     [NEON_2RM_VCLZ] = 0x7,
     [NEON_2RM_VCNT] = 0x1,
@@ -5925,6 +5929,20 @@  static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                     tcg_temp_free_i32(tmp2);
                     tcg_temp_free_i32(tmp3);
                     break;
+                case NEON_2RM_AESE: case NEON_2RM_AESMC:
+                    if (!arm_feature(env, ARM_FEATURE_V8_AES)) {
+                        return 1;
+                    }
+                    tmp = tcg_const_i32(rd);
+                    tmp2 = tcg_const_i32(rm);
+                    tmp3 = tcg_const_i32((insn >> 6) & 1);
+
+                    if (op == NEON_2RM_AESE) {
+                        gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3);
+                    } else {
+                        gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3);
+                    }
+                    break;
                 default:
                 elementwise:
                     for (pass = 0; pass < (q ? 4 : 2); pass++) {