diff mbox

tcg/aarch64: refactoring, remove inline magic insn numbers

Message ID 1391797050-20847-1-git-send-email-claudio.fontana@huawei.com
State New
Headers show

Commit Message

Claudio Fontana Feb. 7, 2014, 6:17 p.m. UTC
From: Claudio Fontana <claudio.fontana@huawei.com>

this is general polishing for tcg/aarch64, removing the inline magic
insn numbers, and putting up enums for intruction classes and subclasses
in their stead.

Signed-off-by: Claudio Fontana <claudio.fontana@huawei.com>
Tested-by: Claudio Fontana <claudio.fontana@huawei.com>
---
 tcg/aarch64/tcg-target.c | 528 ++++++++++++++++++++++++++++-------------------
 1 file changed, 313 insertions(+), 215 deletions(-)

Tested on Foundation v8 with latest mainline kernel for the host,
booting qemu guest targets 32bit arm, x86-64, 32bit sparc.

Comments

Claudio Fontana Feb. 28, 2014, 8:33 a.m. UTC | #1
ping?

Richard, do you prefer to spin your polishing from last year instead?

We can start with the pure refactoring patch as you suggested, and in
this case, you could look here for some things I'd look for,
in particular that we move gradually, that we group instructions that
are handled in a similar way,
and having function names that actually express what is being
accomplished instead of how, for the purpose of readability.

We can later add patches for the additional optional operations and
the performance improvements and optimizations.

Thanks,

Claudio

On 7 February 2014 19:17, Claudio Fontana <hw.claudio@gmail.com> wrote:
> From: Claudio Fontana <claudio.fontana@huawei.com>
>
> this is general polishing for tcg/aarch64, removing the inline magic
> insn numbers, and putting up enums for intruction classes and subclasses
> in their stead.
>
> Signed-off-by: Claudio Fontana <claudio.fontana@huawei.com>
> Tested-by: Claudio Fontana <claudio.fontana@huawei.com>
> ---
>  tcg/aarch64/tcg-target.c | 528 ++++++++++++++++++++++++++++-------------------
>  1 file changed, 313 insertions(+), 215 deletions(-)
>
> Tested on Foundation v8 with latest mainline kernel for the host,
> booting qemu guest targets 32bit arm, x86-64, 32bit sparc.
>
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index 04d7ae3..580f340 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -186,39 +186,121 @@ static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
>      [TCG_COND_LEU] = COND_LS,
>  };
>
> -/* opcodes for LDR / STR instructions with base + simm9 addressing */
> -enum aarch64_ldst_op_data { /* size of the data moved */
> -    LDST_8 = 0x38,
> -    LDST_16 = 0x78,
> -    LDST_32 = 0xb8,
> -    LDST_64 = 0xf8,
> +/* Instructions and instruction classes.
> + * Documentation References are relative to ARM DDI 0487A.a
> + * (C3: A64 Instruction Set Encoding).
> + * Instructions are either directly encoded in this list,
> + * missing only the parameters, or are orred with instruction
> + * subclasses described in enums below.
> + */
> +enum a64_insn {
> +    INSN_B_COND    = 0x54000000, /* 3.2.2 - Conditional Branch imm */
> +    INSN_B         = 0x14000000, /* 3.2.6 - Unconditional branch imm (B) */
> +    INSN_BL        = 0x94000000, /* 3.2.6 - (BL) */
> +    INSN_BR        = 0xd61f0000, /* 3.2.7 - Unconditional branch reg (BR) */
> +    INSN_BLR       = 0xd63f0000, /* 3.2.7 - (BLR) */
> +    INSN_RET       = 0xd65f0000, /* 3.2.7 - (RET) */
> +    INSN_LDST      = 0x38000000, /* 3.3 - Load and Stores (9/12/register) */
> +    INSN_LDSTP     = 0x28000000, /* 3.3.14, 3.3.15, 3.3.16 */
> +    INSN_IMM       = 0x10000000, /* 3.4.1, 3.4.4  - add/sub and logical */
> +    INSN_BF        = 0x13000000, /* 3.4.2, 3.4.3 - bitfield, extract */
> +    INSN_MOVI      = 0x12800000, /* 3.4.5 - Move Wide (Immediate) */
> +    INSN_ARITH     = 0x0a000000, /* 3.5.2, 3.5.10 - add/sub and logical */
> +    INSN_CS        = 0x1a800000, /* 3.5.6 - Conditional select */
> +    INSN_REV       = 0x5ac00800, /* 3.5.7 - Data proc (1 source) (REV/REV32) */
> +    INSN_REV16     = 0x5ac00400, /* 3.5.7 - (REV16) */
> +    INSN_SHIFT     = 0x1ac02000, /* 3.5.8 - Data proc (2 source) */
> +    INSN_MUL       = 0x1b000000, /* 3.5.9 - Data proc (3 source) */
> +    INSN_NOP       = 0xd503201f, /* 3.2.4 - HINT / 5.6.139 NOP */
> +};
> +
> +/* enums for LDR / STR instructions with base + simm9 addressing */
> +enum a64_ldst_data { /* corresponds to s_bits */
> +    LDST_8  = 0x0 << 30,
> +    LDST_16 = 0x1 << 30,
> +    LDST_32 = 0x2 << 30,
> +    LDST_64 = 0x3 << 30
> +};
> +enum a64_ldst_type {    /* type of operation */
> +    LDST_ST     = 0x0 << 20,     /* store */
> +    LDST_LD     = 0x4 << 20,     /* load */
> +    LDST_LD_S_X = 0x8 << 20, /* load and sign-extend into Xt */
> +    LDST_LD_S_W = 0xc << 20, /* load and sign-extend into Wt */
> +};
> +enum a64_ldst_addr {    /* addressing mode */
> +    LDST_9  = 0,
> +    LDST_12 = (1 << 24),
> +    LDST_R  = 0x00206800
>  };
> -enum aarch64_ldst_op_type { /* type of operation */
> -    LDST_ST = 0x0,    /* store */
> -    LDST_LD = 0x4,    /* load */
> -    LDST_LD_S_X = 0x8,  /* load and sign-extend into Xt */
> -    LDST_LD_S_W = 0xc,  /* load and sign-extend into Wt */
> +enum a64_ldstp_type {
> +    LDSTP_ST = 0x0 << 22,
> +    LDSTP_LD = 0x1 << 22
> +};
> +enum a64_ldstp_addr {   /* addressing mode */
> +    LDSTP_POST  = 1 << 23, /* post-index writeback */
> +    LDSTP_NOIDX = 2 << 23, /* no writeback, normal imm7 signed offset */
> +    LDSTP_PRE   = 3 << 23, /* pre-index writeback */
> +};
> +
> +enum a64_insn_arith {
> +    ARITH_ADD  = 0x01 << 24,
> +    ARITH_ADDS = 0x21 << 24,
> +    ARITH_SUB  = 0x41 << 24,
> +    ARITH_SUBS = 0x61 << 24,
> +
> +    ARITH_AND  = 0x00 << 24,
> +    ARITH_OR   = 0x20 << 24,
> +    ARITH_XOR  = 0x40 << 24,
> +    ARITH_ANDS = 0x60 << 24,
>  };
>
> -enum aarch64_arith_opc {
> -    ARITH_AND = 0x0a,
> -    ARITH_ADD = 0x0b,
> -    ARITH_OR = 0x2a,
> -    ARITH_ADDS = 0x2b,
> -    ARITH_XOR = 0x4a,
> -    ARITH_SUB = 0x4b,
> -    ARITH_ANDS = 0x6a,
> -    ARITH_SUBS = 0x6b,
> +enum a64_insn_movi { /* C3.4.5 decode field 'opc' */
> +    MOVZ = 0x02 << 29,
> +    MOVK = 0x03 << 29,
>  };
>
> -enum aarch64_srr_opc {
> -    SRR_SHL = 0x0,
> -    SRR_SHR = 0x4,
> -    SRR_SAR = 0x8,
> -    SRR_ROR = 0xc
> +enum a64_insn_imm_arith { /* 3.4.1 */
> +    IMM_ADD  = 0x01 << 24,
> +    IMM_ADDS = 0x21 << 24,
> +    IMM_SUB  = 0x41 << 24,
> +    IMM_SUBS = 0x61 << 24
>  };
>
> -static inline enum aarch64_ldst_op_data
> +enum a64_insn_imm_log { /* 3.4.4 */
> +    IMM_AND  = 0x02 << 24,
> +    IMM_OR   = 0x22 << 24,
> +    IMM_XOR  = 0x42 << 24,
> +    IMM_ANDS = 0x62 << 24,
> +};
> +
> +enum a64_insn_mul { /* 3.5.9 */
> +    MUL_MADD = 0x0000,
> +    MUL_MSUB = 0x8000,
> +};
> +
> +enum a64_insn_shift {
> +    SHIFT_SHL = 0x0 << 10,
> +    SHIFT_SHR = 0x1 << 10,
> +    SHIFT_SAR = 0x2 << 10,
> +    SHIFT_ROR = 0x3 << 10
> +};
> +
> +enum a64_insn_bf {
> +    BF_SBFM = 0 << 29,
> +    BF_BFM  = 1 << 29,
> +    BF_UBFM = 2 << 29,
> +
> +    BF_EXTR = 1 << 23,
> +};
> +
> +enum a64_insn_cs { /* 3.5.6 */
> +    CS_CSEL  = 0,
> +    CS_CSINC = 1 << 10,
> +    CS_CSINV = 1 << 30,
> +    CS_CSNEG = 1 << 30 | 1 << 10
> +};
> +
> +static inline enum a64_ldst_data
>  aarch64_ldst_get_data(TCGOpcode tcg_op)
>  {
>      switch (tcg_op) {
> @@ -254,7 +336,7 @@ aarch64_ldst_get_data(TCGOpcode tcg_op)
>      }
>  }
>
> -static inline enum aarch64_ldst_op_type
> +static inline enum a64_ldst_type
>  aarch64_ldst_get_type(TCGOpcode tcg_op)
>  {
>      switch (tcg_op) {
> @@ -297,8 +379,8 @@ static inline uint32_t tcg_in32(TCGContext *s)
>  }
>
>  static inline void tcg_out_ldst_9(TCGContext *s,
> -                                  enum aarch64_ldst_op_data op_data,
> -                                  enum aarch64_ldst_op_type op_type,
> +                                  enum a64_ldst_data op_data,
> +                                  enum a64_ldst_type op_type,
>                                    TCGReg rd, TCGReg rn, tcg_target_long offset)
>  {
>      /* use LDUR with BASE register with 9bit signed unscaled offset */
> @@ -306,42 +388,123 @@ static inline void tcg_out_ldst_9(TCGContext *s,
>
>      if (offset < 0) {
>          off = (256 + offset);
> -        mod = 0x1;
> +        mod = 0x1 << 20;
>      } else {
>          off = offset;
> -        mod = 0x0;
> +        mod = 0x0 << 20;
>      }
>
> -    mod |= op_type;
> -    tcg_out32(s, op_data << 24 | mod << 20 | off << 12 | rn << 5 | rd);
> +    tcg_out32(s, INSN_LDST | LDST_9 | op_data | op_type | mod | off << 12
> +              | rn << 5 | rd);
>  }
>
>  /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
>  static inline void tcg_out_ldst_12(TCGContext *s,
> -                                   enum aarch64_ldst_op_data op_data,
> -                                   enum aarch64_ldst_op_type op_type,
> +                                   enum a64_ldst_data op_data,
> +                                   enum a64_ldst_type op_type,
>                                     TCGReg rd, TCGReg rn,
>                                     tcg_target_ulong scaled_uimm)
>  {
> -    tcg_out32(s, (op_data | 1) << 24
> -              | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
> +    tcg_out32(s, INSN_LDST | LDST_12 | op_data | op_type | scaled_uimm << 10
> +              | rn << 5 | rd);
> +}
> +
> +static inline void tcg_out_ldst_r(TCGContext *s,
> +                                  enum a64_ldst_data op_data,
> +                                  enum a64_ldst_type op_type,
> +                                  TCGReg rd, TCGReg base, TCGReg regoff)
> +{
> +    /* load from memory to register using base + 64bit register offset */
> +    tcg_out32(s, INSN_LDST | LDST_R | op_data | op_type | regoff << 16
> +              | base << 5 | rd);
> +}
> +
> +/* tcg_out_ldstp expects a scaled signed immediate offset */
> +static inline void tcg_out_ldstp(TCGContext *s, int ext,
> +                                 enum a64_ldstp_type op_type,
> +                                 enum a64_ldstp_addr idx,
> +                                 TCGReg rt, TCGReg rt2, TCGReg rn,
> +                                 int simm7)
> +{
> +    unsigned int insn = ext ? (1 << 31) : 0;
> +    simm7 &= 0x7f;
> +    insn |= INSN_LDSTP | op_type | idx;
> +    tcg_out32(s, insn | simm7 << 15 | rt2 << 10 | rn << 5 | rt);
> +}
> +
> +static inline void tcg_out_arith(TCGContext *s, enum a64_insn_arith opc,
> +                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm,
> +                                 int shift_imm)
> +{
> +    /* Using shifted register arithmetic operations */
> +    /* if extended register operation (64bit) just set bit 31. */
> +    unsigned int shift, insn = ext ? (1 << 31) : 0;
> +    insn |= INSN_ARITH | opc;
> +
> +    if (shift_imm == 0) {
> +        shift = 0;
> +    } else if (shift_imm > 0) {
> +        shift = shift_imm << 10 | 1 << 22;
> +    } else /* (shift_imm < 0) */ {
> +        shift = (-shift_imm) << 10;
> +    }
> +    tcg_out32(s, insn | rm << 16 | shift | rn << 5 | rd);
> +}
> +
> +static inline void tcg_out_imm_arith(TCGContext *s, enum a64_insn_imm_arith op,
> +                                     int ext, TCGReg rd, TCGReg rn,
> +                                     unsigned int aimm)
> +{
> +    /* add/sub immediate unsigned 12bit value (with LSL 0 or 12) */
> +    /* if extended register operation (64bit) just set bit 31. */
> +    unsigned int insn = ext ? (1 << 31) : 0;
> +    insn |= INSN_IMM | op;
> +
> +    if (aimm <= 0xfff) {
> +        aimm <<= 10;
> +    } else {
> +        /* we can only shift left by 12, on assert we cannot represent */
> +        assert(!(aimm & 0xfff));
> +        assert(aimm <= 0xfff000);
> +        insn |= 1 << 22; /* apply LSL 12 */
> +        aimm >>= 2;
> +    }
> +
> +    tcg_out32(s, insn | aimm | (rn << 5) | rd);
> +}
> +
> +static inline void tcg_out_addi(TCGContext *s, int ext,
> +                                TCGReg rd, TCGReg rn, unsigned int aimm)
> +{
> +    tcg_out_imm_arith(s, IMM_ADD, ext, rd, rn, aimm);
> +}
> +
> +static inline void tcg_out_subi(TCGContext *s, int ext,
> +                                TCGReg rd, TCGReg rn, unsigned int aimm)
> +{
> +    tcg_out_imm_arith(s, IMM_SUB, ext, rd, rn, aimm);
>  }
>
>  static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src)
>  {
> -    /* register to register move using MOV (shifted register with no shift) */
> -    /* using MOV 0x2a0003e0 | (shift).. */
> -    unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
> -    tcg_out32(s, base | src << 16 | rd);
> +    /* register to register move using ORR (shifted register with no shift) */
> +    tcg_out_arith(s, ARITH_OR, ext, rd, TCG_REG_XZR, src, 0);
> +}
> +
> +static inline void tcg_out_movr_sp(TCGContext *s, int ext,
> +                                   TCGReg rd, TCGReg rn)
> +{
> +    /* reg to reg move using immediate add, useful to move to/from SP */
> +    tcg_out_imm_arith(s, IMM_ADD, ext, rd, rn, 0);
>  }
>
>  static inline void tcg_out_movi_aux(TCGContext *s,
>                                      TCGReg rd, uint64_t value)
>  {
> -    uint32_t half, base, shift, movk = 0;
> +    uint32_t half, insn, shift;
>      /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
> -    /* using MOVZ 0x52800000 | extended reg.. */
> -    base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
> +    insn = (value > 0xffffffff) ? 1 << 31 : 0;
> +    insn |= INSN_MOVI | MOVZ;
>      /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
>         first MOVZ with the half-word immediate skipping the zeros, with a shift
>         (LSL) equal to this number. Then morph all next instructions into MOVKs.
> @@ -351,8 +514,8 @@ static inline void tcg_out_movi_aux(TCGContext *s,
>      do {
>          shift = ctz64(value) & (63 & -16);
>          half = (value >> shift) & 0xffff;
> -        tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
> -        movk = 0x20000000; /* morph next MOVZs into MOVKs */
> +        tcg_out32(s, insn | shift << 17 | half << 5 | rd);
> +        insn |= MOVK; /* change MOVZs into MOVKs */
>          value &= ~(0xffffUL << shift);
>      } while (value);
>  }
> @@ -367,21 +530,9 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
>      }
>  }
>
> -static inline void tcg_out_ldst_r(TCGContext *s,
> -                                  enum aarch64_ldst_op_data op_data,
> -                                  enum aarch64_ldst_op_type op_type,
> -                                  TCGReg rd, TCGReg base, TCGReg regoff)
> -{
> -    /* load from memory to register using base + 64bit register offset */
> -    /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
> -    /* the 0x6000 is for the "no extend field" */
> -    tcg_out32(s, 0x00206800
> -              | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
> -}
> -
>  /* solve the whole ldst problem */
> -static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
> -                                enum aarch64_ldst_op_type type,
> +static inline void tcg_out_ldst(TCGContext *s, enum a64_ldst_data data,
> +                                enum a64_ldst_type type,
>                                  TCGReg rd, TCGReg rn, tcg_target_long offset)
>  {
>      if (offset >= -256 && offset < 256) {
> @@ -392,7 +543,7 @@ static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
>      if (offset >= 256) {
>          /* if the offset is naturally aligned and in range,
>             then we can use the scaled uimm12 encoding */
> -        unsigned int s_bits = data >> 6;
> +        unsigned int s_bits = data >> 30;
>          if (!(offset & ((1 << s_bits) - 1))) {
>              tcg_target_ulong scaled_uimm = offset >> s_bits;
>              if (scaled_uimm <= 0xfff) {
> @@ -407,14 +558,6 @@ static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
>      tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
>  }
>
> -/* mov alias implemented with add immediate, useful to move to/from SP */
> -static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn)
> -{
> -    /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
> -    unsigned int base = ext ? 0x91000000 : 0x11000000;
> -    tcg_out32(s, base | rn << 5 | rd);
> -}
> -
>  static inline void tcg_out_mov(TCGContext *s,
>                                 TCGType type, TCGReg ret, TCGReg arg)
>  {
> @@ -437,62 +580,47 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
>                   arg, arg1, arg2);
>  }
>
> -static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
> -                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm,
> -                                 int shift_imm)
> +static inline void tcg_out_mul(TCGContext *s, enum a64_insn_mul op, int ext,
> +                               TCGReg rd, TCGReg rn, TCGReg ra, TCGReg rm)
>  {
> -    /* Using shifted register arithmetic operations */
> -    /* if extended register operation (64bit) just OR with 0x80 << 24 */
> -    unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24;
> -    if (shift_imm == 0) {
> -        shift = 0;
> -    } else if (shift_imm > 0) {
> -        shift = shift_imm << 10 | 1 << 22;
> -    } else /* (shift_imm < 0) */ {
> -        shift = (-shift_imm) << 10;
> -    }
> -    tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
> +    unsigned int insn = ext ? (1 << 31) : 0;
> +    insn |= INSN_MUL | op;
> +    tcg_out32(s, insn | rm << 16 | ra << 10 | rn << 5 | rd);
>  }
>
> -static inline void tcg_out_mul(TCGContext *s, int ext,
> -                               TCGReg rd, TCGReg rn, TCGReg rm)
> +static inline void tcg_out_shift(TCGContext *s, enum a64_insn_shift shift_type,
> +                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm)
>  {
> -    /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
> -    unsigned int base = ext ? 0x9b007c00 : 0x1b007c00;
> -    tcg_out32(s, base | rm << 16 | rn << 5 | rd);
> +    unsigned int insn = ext ? (1 << 31) : 0;
> +    insn |= INSN_SHIFT | shift_type;
> +    tcg_out32(s, insn | rm << 16 | rn << 5 | rd);
>  }
>
> -static inline void tcg_out_shiftrot_reg(TCGContext *s,
> -                                        enum aarch64_srr_opc opc, int ext,
> -                                        TCGReg rd, TCGReg rn, TCGReg rm)
> +static inline void tcg_out_bf(TCGContext *s, enum a64_insn_bf op,
> +                              int ext, TCGReg rd, TCGReg rn,
> +                              unsigned int rm_immr, unsigned int imms)
>  {
> -    /* using 2-source data processing instructions 0x1ac02000 */
> -    unsigned int base = ext ? 0x9ac02000 : 0x1ac02000;
> -    tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
> +    unsigned int insn = ext ? (1 << 31) | (1 << 22) : 0;
> +    insn |= INSN_BF | op;
> +    tcg_out32(s, insn | rm_immr << 16 | imms << 10 | rn << 5 | rd);
>  }
>
>  static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
> -                                unsigned int a, unsigned int b)
> +                                unsigned int immr, unsigned int imms)
>  {
> -    /* Using UBFM 0x53000000 Wd, Wn, a, b */
> -    unsigned int base = ext ? 0xd3400000 : 0x53000000;
> -    tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
> +    tcg_out_bf(s, BF_UBFM, ext, rd, rn, immr, imms);
>  }
>
>  static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
> -                                unsigned int a, unsigned int b)
> +                                unsigned int immr, unsigned int imms)
>  {
> -    /* Using SBFM 0x13000000 Wd, Wn, a, b */
> -    unsigned int base = ext ? 0x93400000 : 0x13000000;
> -    tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
> +    tcg_out_bf(s, BF_SBFM, ext, rd, rn, immr, imms);
>  }
>
>  static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd,
> -                                TCGReg rn, TCGReg rm, unsigned int a)
> +                                TCGReg rn, TCGReg rm, unsigned int imms)
>  {
> -    /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
> -    unsigned int base = ext ? 0x93c00000 : 0x13800000;
> -    tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
> +    tcg_out_bf(s, BF_EXTR, ext, rd, rn, rm, imms);
>  }
>
>  static inline void tcg_out_shl(TCGContext *s, int ext,
> @@ -541,11 +669,20 @@ static inline void tcg_out_cmp(TCGContext *s, int ext, TCGReg rn, TCGReg rm,
>      tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
>  }
>
> +static inline void tcg_out_cs(TCGContext *s, enum a64_insn_cs op, int ext,
> +                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
> +{
> +    unsigned int cond, insn = ext ? (1 << 31) : 0;
> +    cond = tcg_cond_to_aarch64[tcg_invert_cond(c)];
> +
> +    insn |= INSN_CS | op;
> +    tcg_out32(s, insn | rm << 16 | cond << 12 | rn << 5 | rd);
> +}
> +
>  static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c)
>  {
> -    /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
> -    unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
> -    tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
> +    /* Using CSET alias of CSINC Xd, XZR, XZR, cond */
> +    tcg_out_cs(s, CS_CSINC, ext, rd, TCG_REG_XZR, TCG_REG_XZR, c);
>  }
>
>  static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
> @@ -558,7 +695,7 @@ static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
>          tcg_abort();
>      }
>
> -    tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
> +    tcg_out32(s, INSN_B | (offset & 0x03ffffff));
>  }
>
>  static inline void tcg_out_goto_noaddr(TCGContext *s)
> @@ -569,7 +706,7 @@ static inline void tcg_out_goto_noaddr(TCGContext *s)
>         Mask away possible garbage in the high bits for the first translation,
>         while keeping the offset bits for retranslation. */
>      uint32_t insn;
> -    insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
> +    insn = (tcg_in32(s) & 0x03ffffff) | INSN_B;
>      tcg_out32(s, insn);
>  }
>
> @@ -578,7 +715,7 @@ static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
>      /* see comments in tcg_out_goto_noaddr */
>      uint32_t insn;
>      insn = tcg_in32(s) & (0x07ffff << 5);
> -    insn |= 0x54000000 | tcg_cond_to_aarch64[c];
> +    insn |= INSN_B_COND | tcg_cond_to_aarch64[c];
>      tcg_out32(s, insn);
>  }
>
> @@ -594,17 +731,17 @@ static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c,
>      }
>
>      offset &= 0x7ffff;
> -    tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
> +    tcg_out32(s, INSN_B_COND | tcg_cond_to_aarch64[c] | offset << 5);
>  }
>
>  static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
>  {
> -    tcg_out32(s, 0xd63f0000 | reg << 5);
> +    tcg_out32(s, INSN_BLR | reg << 5);
>  }
>
>  static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
>  {
> -    tcg_out32(s, 0xd61f0000 | reg << 5);
> +    tcg_out32(s, INSN_BR | reg << 5);
>  }
>
>  static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
> @@ -617,10 +754,16 @@ static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
>          tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
>          tcg_out_callr(s, TCG_REG_TMP);
>      } else {
> -        tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
> +        tcg_out32(s, INSN_BL | (offset & 0x03ffffff));
>      }
>  }
>
> +static inline void tcg_out_ret(TCGContext *s)
> +{
> +    /* emit RET { LR } */
> +    tcg_out32(s, INSN_RET | TCG_REG_LR << 5);
> +}
> +
>  /* encode a logical immediate, mapping user parameter
>     M=set bits pattern length to S=M-1 */
>  static inline unsigned int
> @@ -630,35 +773,28 @@ aarch64_limm(unsigned int m, unsigned int r)
>      return r << 16 | (m - 1) << 10;
>  }
>
> -/* test a register against an immediate bit pattern made of
> -   M set bits rotated right by R.
> +/* perform a logical operation on a register against an immediate bit pattern
> +   made of M set bits rotated right by R.
>     Examples:
>     to test a 32/64 reg against 0x00000007, pass M = 3,  R = 0.
>     to test a 32/64 reg against 0x000000ff, pass M = 8,  R = 0.
>     to test a 32bit reg against 0xff000000, pass M = 8,  R = 8.
>     to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
>   */
> -static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn,
> -                               unsigned int m, unsigned int r)
> +static inline void tcg_out_imm_log(TCGContext *s, enum a64_insn_imm_log op,
> +                                   int ext, TCGReg rd, TCGReg rn,
> +                                   unsigned int m, unsigned int r)
>  {
> -    /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
> -    unsigned int base = ext ? 0xf240001f : 0x7200001f;
> -    tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
> +    unsigned int insn = ext ? (1 << 31) | (1 << 22) : 0;
> +    insn |= INSN_IMM | op;
> +    tcg_out32(s, insn | aarch64_limm(m, r) | rn << 5 | rd);
>  }
>
> -/* and a register with a bit pattern, similarly to TST, no flags change */
> +/* and a register with a bit pattern, without changing flags */
>  static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
>                                  unsigned int m, unsigned int r)
>  {
> -    /* using AND 0x12000000 */
> -    unsigned int base = ext ? 0x92400000 : 0x12000000;
> -    tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
> -}
> -
> -static inline void tcg_out_ret(TCGContext *s)
> -{
> -    /* emit RET { LR } */
> -    tcg_out32(s, 0xd65f03c0);
> +    tcg_out_imm_log(s, IMM_AND, ext, rd, rn, m, r);
>  }
>
>  void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
> @@ -703,16 +839,16 @@ static inline void tcg_out_goto_label_cond(TCGContext *s,
>
>  static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
>  {
> -    /* using REV 0x5ac00800 */
> -    unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
> -    tcg_out32(s, base | rm << 5 | rd);
> +    unsigned int insn = ext ? (1 << 31) | (1 << 10) : 0;
> +    insn |= INSN_REV;
> +    tcg_out32(s, insn | rm << 5 | rd);
>  }
>
>  static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
>  {
> -    /* using REV16 0x5ac00400 */
> -    unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
> -    tcg_out32(s, base | rm << 5 | rd);
> +    unsigned int insn = ext ? (1 << 31) : 0;
> +    insn |= INSN_REV16;
> +    tcg_out32(s, insn | rm << 5 | rd);
>  }
>
>  static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits,
> @@ -733,49 +869,9 @@ static inline void tcg_out_uxt(TCGContext *s, int s_bits,
>      tcg_out_ubfm(s, 0, rd, rn, 0, bits);
>  }
>
> -static inline void tcg_out_addi(TCGContext *s, int ext,
> -                                TCGReg rd, TCGReg rn, unsigned int aimm)
> -{
> -    /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
> -    /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
> -    unsigned int base = ext ? 0x91000000 : 0x11000000;
> -
> -    if (aimm <= 0xfff) {
> -        aimm <<= 10;
> -    } else {
> -        /* we can only shift left by 12, on assert we cannot represent */
> -        assert(!(aimm & 0xfff));
> -        assert(aimm <= 0xfff000);
> -        base |= 1 << 22; /* apply LSL 12 */
> -        aimm >>= 2;
> -    }
> -
> -    tcg_out32(s, base | aimm | (rn << 5) | rd);
> -}
> -
> -static inline void tcg_out_subi(TCGContext *s, int ext,
> -                                TCGReg rd, TCGReg rn, unsigned int aimm)
> -{
> -    /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
> -    /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
> -    unsigned int base = ext ? 0xd1000000 : 0x51000000;
> -
> -    if (aimm <= 0xfff) {
> -        aimm <<= 10;
> -    } else {
> -        /* we can only shift left by 12, on assert we cannot represent */
> -        assert(!(aimm & 0xfff));
> -        assert(aimm <= 0xfff000);
> -        base |= 1 << 22; /* apply LSL 12 */
> -        aimm >>= 2;
> -    }
> -
> -    tcg_out32(s, base | aimm | (rn << 5) | rd);
> -}
> -
>  static inline void tcg_out_nop(TCGContext *s)
>  {
> -    tcg_out32(s, 0xd503201f);
> +    tcg_out32(s, INSN_NOP);
>  }
>
>  #ifdef CONFIG_SOFTMMU
> @@ -1047,44 +1143,47 @@ static uint8_t *tb_ret_addr;
>     ret
>  */
>
> -/* push r1 and r2, and alloc stack space for a total of
> -   alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
> -static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
> -                                     TCGReg r1, TCGReg r2, int alloc_n)
> +/* push r1 and r2, and alloc stack space for a total of alloc_npairs
> +   (1 pair=16 bytes, must be between 1 and 31. */
> +static inline void tcg_out_push_pair(TCGContext *s, TCGReg base,
> +                                     TCGReg r1, TCGReg r2,
> +                                     unsigned int alloc_npairs)
>  {
> -    /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
> -       | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
> -    assert(alloc_n > 0 && alloc_n < 0x20);
> -    alloc_n = (-alloc_n) & 0x3f;
> -    tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
> +    int scaled_offset;
> +    assert(alloc_npairs > 0 && alloc_npairs < 0x20);
> +    scaled_offset = -(alloc_npairs * 2); /* scaled offset is in 8-byte elements */
> +    tcg_out_ldstp(s, 1, LDSTP_ST, LDSTP_PRE, r1, r2, base, scaled_offset);
>  }
>
>  /* dealloc stack space for a total of alloc_n elements and pop r1, r2.  */
> -static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
> -                                    TCGReg r1, TCGReg r2, int alloc_n)
> +static inline void tcg_out_pop_pair(TCGContext *s, TCGReg base,
> +                                    TCGReg r1, TCGReg r2,
> +                                    unsigned int alloc_npairs)
>  {
> -    /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
> -       | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
> -    assert(alloc_n > 0 && alloc_n < 0x20);
> -    tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
> +    int scaled_offset;
> +    assert(alloc_npairs > 0 && alloc_npairs < 0x20);
> +    scaled_offset = (alloc_npairs * 2); /* scaled offset is in 8-byte elements */
> +    tcg_out_ldstp(s, 1, LDSTP_LD, LDSTP_POST, r1, r2, base, scaled_offset);
>  }
>
> -static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
> -                                      TCGReg r1, TCGReg r2, int idx)
> +static inline void tcg_out_store_pair(TCGContext *s, TCGReg base,
> +                                      TCGReg r1, TCGReg r2,
> +                                      unsigned int pair_idx)
>  {
> -    /* using register pair offset simm7 STP 0x29000000 | (ext)
> -       | idx << 16 | r2 << 10 | addr << 5 | r1 */
> -    assert(idx > 0 && idx < 0x20);
> -    tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
> +    int scaled_offset;
> +    assert(pair_idx > 0 && pair_idx < 0x20);
> +    scaled_offset = (pair_idx * 2); /* scaled offset is in 8-byte elements */
> +    tcg_out_ldstp(s, 1, LDSTP_ST, LDSTP_NOIDX, r1, r2, base, scaled_offset);
>  }
>
> -static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
> -                                     TCGReg r1, TCGReg r2, int idx)
> +static inline void tcg_out_load_pair(TCGContext *s, TCGReg base,
> +                                     TCGReg r1, TCGReg r2,
> +                                     unsigned int pair_idx)
>  {
> -    /* using register pair offset simm7 LDP 0x29400000 | (ext)
> -       | idx << 16 | r2 << 10 | addr << 5 | r1 */
> -    assert(idx > 0 && idx < 0x20);
> -    tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
> +    int scaled_offset;
> +    assert(pair_idx > 0 && pair_idx < 0x20);
> +    scaled_offset = (pair_idx * 2); /* scaled offset is in 8-byte elements */
> +    tcg_out_ldstp(s, 1, LDSTP_LD, LDSTP_NOIDX, r1, r2, base, scaled_offset);
>  }
>
>  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> @@ -1193,7 +1292,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_mul_i64:
>          ext = 1; /* fall through */
>      case INDEX_op_mul_i32:
> -        tcg_out_mul(s, ext, args[0], args[1], args[2]);
> +        tcg_out_mul(s, MUL_MADD, ext, args[0], args[1], TCG_REG_XZR, args[2]);
>          break;
>
>      case INDEX_op_shl_i64:
> @@ -1202,7 +1301,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (const_args[2]) {    /* LSL / UBFM Wd, Wn, (32 - m) */
>              tcg_out_shl(s, ext, args[0], args[1], args[2]);
>          } else {                /* LSL / LSLV */
> -            tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
> +            tcg_out_shift(s, SHIFT_SHL, ext, args[0], args[1], args[2]);
>          }
>          break;
>
> @@ -1212,7 +1311,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (const_args[2]) {    /* LSR / UBFM Wd, Wn, m, 31 */
>              tcg_out_shr(s, ext, args[0], args[1], args[2]);
>          } else {                /* LSR / LSRV */
> -            tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
> +            tcg_out_shift(s, SHIFT_SHR, ext, args[0], args[1], args[2]);
>          }
>          break;
>
> @@ -1222,7 +1321,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (const_args[2]) {    /* ASR / SBFM Wd, Wn, m, 31 */
>              tcg_out_sar(s, ext, args[0], args[1], args[2]);
>          } else {                /* ASR / ASRV */
> -            tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
> +            tcg_out_shift(s, SHIFT_SAR, ext, args[0], args[1], args[2]);
>          }
>          break;
>
> @@ -1232,7 +1331,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, m */
>              tcg_out_rotr(s, ext, args[0], args[1], args[2]);
>          } else {                /* ROR / RORV */
> -            tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
> +            tcg_out_shift(s, SHIFT_ROR, ext, args[0], args[1], args[2]);
>          }
>          break;
>
> @@ -1244,8 +1343,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          } else {
>              tcg_out_arith(s, ARITH_SUB, 0,
>                            TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
> -            tcg_out_shiftrot_reg(s, SRR_ROR, ext,
> -                                 args[0], args[1], TCG_REG_TMP);
> +            tcg_out_shift(s, SHIFT_ROR, ext, args[0], args[1], TCG_REG_TMP);
>          }
>          break;
>
> @@ -1474,7 +1572,7 @@ static void tcg_target_init(TCGContext *s)
>  static void tcg_target_qemu_prologue(TCGContext *s)
>  {
>      /* NB: frame sizes are in 16 byte stack units! */
> -    int frame_size_callee_saved, frame_size_tcg_locals;
> +    unsigned int frame_size_callee_saved, frame_size_tcg_locals;
>      TCGReg r;
>
>      /* save pairs             (FP, LR) and (X19, X20) .. (X27, X28) */
> --
> 1.8.5.3
>
Richard Henderson Feb. 28, 2014, 4:21 p.m. UTC | #2
On 02/28/2014 12:33 AM, Claudio Fontana wrote:
> ping?
> 
> Richard, do you prefer to spin your polishing from last year instead?

I do prefer my patch set over this.

>> +    INSN_IMM       = 0x10000000, /* 3.4.1, 3.4.4  - add/sub and logical */
>> +    INSN_SHIFT     = 0x1ac02000, /* 3.5.8 - Data proc (2 source) */
...
>> +enum a64_insn_arith {
>> +    ARITH_ADD  = 0x01 << 24,
>> +    ARITH_ADDS = 0x21 << 24,
>> +    ARITH_SUB  = 0x41 << 24,
>> +    ARITH_SUBS = 0x61 << 24,
>> +
>> +    ARITH_AND  = 0x00 << 24,
>> +    ARITH_OR   = 0x20 << 24,
>> +    ARITH_XOR  = 0x40 << 24,
>> +    ARITH_ANDS = 0x60 << 24,
>>  };
...
>> +enum a64_insn_imm_arith { /* 3.4.1 */
>> +    IMM_ADD  = 0x01 << 24,
>> +    IMM_ADDS = 0x21 << 24,
>> +    IMM_SUB  = 0x41 << 24,
>> +    IMM_SUBS = 0x61 << 24
...
>> +enum a64_insn_imm_log { /* 3.4.4 */
>> +    IMM_AND  = 0x02 << 24,
>> +    IMM_OR   = 0x22 << 24,
>> +    IMM_XOR  = 0x42 << 24,
>> +    IMM_ANDS = 0x62 << 24,
>> +};

In particular, I'm not keen on these opcode / subopcode splits. They just make
the source more confusing to read, and in my experience aren't helpful in
practice. It's just as easy to put them together into e.g. a single "ADDI" symbol.


r~
diff mbox

Patch

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 04d7ae3..580f340 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -186,39 +186,121 @@  static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
     [TCG_COND_LEU] = COND_LS,
 };
 
-/* opcodes for LDR / STR instructions with base + simm9 addressing */
-enum aarch64_ldst_op_data { /* size of the data moved */
-    LDST_8 = 0x38,
-    LDST_16 = 0x78,
-    LDST_32 = 0xb8,
-    LDST_64 = 0xf8,
+/* Instructions and instruction classes.
+ * Documentation References are relative to ARM DDI 0487A.a
+ * (C3: A64 Instruction Set Encoding).
+ * Instructions are either directly encoded in this list,
+ * missing only the parameters, or are orred with instruction
+ * subclasses described in enums below.
+ */
+enum a64_insn {
+    INSN_B_COND    = 0x54000000, /* 3.2.2 - Conditional Branch imm */
+    INSN_B         = 0x14000000, /* 3.2.6 - Unconditional branch imm (B) */
+    INSN_BL        = 0x94000000, /* 3.2.6 - (BL) */
+    INSN_BR        = 0xd61f0000, /* 3.2.7 - Unconditional branch reg (BR) */
+    INSN_BLR       = 0xd63f0000, /* 3.2.7 - (BLR) */
+    INSN_RET       = 0xd65f0000, /* 3.2.7 - (RET) */
+    INSN_LDST      = 0x38000000, /* 3.3 - Load and Stores (9/12/register) */
+    INSN_LDSTP     = 0x28000000, /* 3.3.14, 3.3.15, 3.3.16 */
+    INSN_IMM       = 0x10000000, /* 3.4.1, 3.4.4  - add/sub and logical */
+    INSN_BF        = 0x13000000, /* 3.4.2, 3.4.3 - bitfield, extract */
+    INSN_MOVI      = 0x12800000, /* 3.4.5 - Move Wide (Immediate) */
+    INSN_ARITH     = 0x0a000000, /* 3.5.2, 3.5.10 - add/sub and logical */
+    INSN_CS        = 0x1a800000, /* 3.5.6 - Conditional select */
+    INSN_REV       = 0x5ac00800, /* 3.5.7 - Data proc (1 source) (REV/REV32) */
+    INSN_REV16     = 0x5ac00400, /* 3.5.7 - (REV16) */
+    INSN_SHIFT     = 0x1ac02000, /* 3.5.8 - Data proc (2 source) */
+    INSN_MUL       = 0x1b000000, /* 3.5.9 - Data proc (3 source) */
+    INSN_NOP       = 0xd503201f, /* 3.2.4 - HINT / 5.6.139 NOP */
+};
+
+/* enums for LDR / STR instructions with base + simm9 addressing */
+enum a64_ldst_data { /* corresponds to s_bits */
+    LDST_8  = 0x0 << 30,
+    LDST_16 = 0x1 << 30,
+    LDST_32 = 0x2 << 30,
+    LDST_64 = 0x3 << 30
+};
+enum a64_ldst_type {    /* type of operation */
+    LDST_ST     = 0x0 << 20,     /* store */
+    LDST_LD     = 0x4 << 20,     /* load */
+    LDST_LD_S_X = 0x8 << 20, /* load and sign-extend into Xt */
+    LDST_LD_S_W = 0xc << 20, /* load and sign-extend into Wt */
+};
+enum a64_ldst_addr {    /* addressing mode */
+    LDST_9  = 0,
+    LDST_12 = (1 << 24),
+    LDST_R  = 0x00206800
 };
-enum aarch64_ldst_op_type { /* type of operation */
-    LDST_ST = 0x0,    /* store */
-    LDST_LD = 0x4,    /* load */
-    LDST_LD_S_X = 0x8,  /* load and sign-extend into Xt */
-    LDST_LD_S_W = 0xc,  /* load and sign-extend into Wt */
+enum a64_ldstp_type {
+    LDSTP_ST = 0x0 << 22,
+    LDSTP_LD = 0x1 << 22
+};
+enum a64_ldstp_addr {   /* addressing mode */
+    LDSTP_POST  = 1 << 23, /* post-index writeback */
+    LDSTP_NOIDX = 2 << 23, /* no writeback, normal imm7 signed offset */
+    LDSTP_PRE   = 3 << 23, /* pre-index writeback */
+};
+
+enum a64_insn_arith {
+    ARITH_ADD  = 0x01 << 24,
+    ARITH_ADDS = 0x21 << 24,
+    ARITH_SUB  = 0x41 << 24,
+    ARITH_SUBS = 0x61 << 24,
+
+    ARITH_AND  = 0x00 << 24,
+    ARITH_OR   = 0x20 << 24,
+    ARITH_XOR  = 0x40 << 24,
+    ARITH_ANDS = 0x60 << 24,
 };
 
-enum aarch64_arith_opc {
-    ARITH_AND = 0x0a,
-    ARITH_ADD = 0x0b,
-    ARITH_OR = 0x2a,
-    ARITH_ADDS = 0x2b,
-    ARITH_XOR = 0x4a,
-    ARITH_SUB = 0x4b,
-    ARITH_ANDS = 0x6a,
-    ARITH_SUBS = 0x6b,
+enum a64_insn_movi { /* C3.4.5 decode field 'opc' */
+    MOVZ = 0x02 << 29,
+    MOVK = 0x03 << 29,
 };
 
-enum aarch64_srr_opc {
-    SRR_SHL = 0x0,
-    SRR_SHR = 0x4,
-    SRR_SAR = 0x8,
-    SRR_ROR = 0xc
+enum a64_insn_imm_arith { /* 3.4.1 */
+    IMM_ADD  = 0x01 << 24,
+    IMM_ADDS = 0x21 << 24,
+    IMM_SUB  = 0x41 << 24,
+    IMM_SUBS = 0x61 << 24
 };
 
-static inline enum aarch64_ldst_op_data
+enum a64_insn_imm_log { /* 3.4.4 */
+    IMM_AND  = 0x02 << 24,
+    IMM_OR   = 0x22 << 24,
+    IMM_XOR  = 0x42 << 24,
+    IMM_ANDS = 0x62 << 24,
+};
+
+enum a64_insn_mul { /* 3.5.9 */
+    MUL_MADD = 0x0000,
+    MUL_MSUB = 0x8000,
+};
+
+enum a64_insn_shift {
+    SHIFT_SHL = 0x0 << 10,
+    SHIFT_SHR = 0x1 << 10,
+    SHIFT_SAR = 0x2 << 10,
+    SHIFT_ROR = 0x3 << 10
+};
+
+enum a64_insn_bf {
+    BF_SBFM = 0 << 29,
+    BF_BFM  = 1 << 29,
+    BF_UBFM = 2 << 29,
+
+    BF_EXTR = 1 << 23,
+};
+
+enum a64_insn_cs { /* 3.5.6 */
+    CS_CSEL  = 0,
+    CS_CSINC = 1 << 10,
+    CS_CSINV = 1 << 30,
+    CS_CSNEG = 1 << 30 | 1 << 10
+};
+
+static inline enum a64_ldst_data
 aarch64_ldst_get_data(TCGOpcode tcg_op)
 {
     switch (tcg_op) {
@@ -254,7 +336,7 @@  aarch64_ldst_get_data(TCGOpcode tcg_op)
     }
 }
 
-static inline enum aarch64_ldst_op_type
+static inline enum a64_ldst_type
 aarch64_ldst_get_type(TCGOpcode tcg_op)
 {
     switch (tcg_op) {
@@ -297,8 +379,8 @@  static inline uint32_t tcg_in32(TCGContext *s)
 }
 
 static inline void tcg_out_ldst_9(TCGContext *s,
-                                  enum aarch64_ldst_op_data op_data,
-                                  enum aarch64_ldst_op_type op_type,
+                                  enum a64_ldst_data op_data,
+                                  enum a64_ldst_type op_type,
                                   TCGReg rd, TCGReg rn, tcg_target_long offset)
 {
     /* use LDUR with BASE register with 9bit signed unscaled offset */
@@ -306,42 +388,123 @@  static inline void tcg_out_ldst_9(TCGContext *s,
 
     if (offset < 0) {
         off = (256 + offset);
-        mod = 0x1;
+        mod = 0x1 << 20;
     } else {
         off = offset;
-        mod = 0x0;
+        mod = 0x0 << 20;
     }
 
-    mod |= op_type;
-    tcg_out32(s, op_data << 24 | mod << 20 | off << 12 | rn << 5 | rd);
+    tcg_out32(s, INSN_LDST | LDST_9 | op_data | op_type | mod | off << 12
+              | rn << 5 | rd);
 }
 
 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
 static inline void tcg_out_ldst_12(TCGContext *s,
-                                   enum aarch64_ldst_op_data op_data,
-                                   enum aarch64_ldst_op_type op_type,
+                                   enum a64_ldst_data op_data,
+                                   enum a64_ldst_type op_type,
                                    TCGReg rd, TCGReg rn,
                                    tcg_target_ulong scaled_uimm)
 {
-    tcg_out32(s, (op_data | 1) << 24
-              | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
+    tcg_out32(s, INSN_LDST | LDST_12 | op_data | op_type | scaled_uimm << 10
+              | rn << 5 | rd);
+}
+
+static inline void tcg_out_ldst_r(TCGContext *s,
+                                  enum a64_ldst_data op_data,
+                                  enum a64_ldst_type op_type,
+                                  TCGReg rd, TCGReg base, TCGReg regoff)
+{
+    /* load from memory to register using base + 64bit register offset */
+    tcg_out32(s, INSN_LDST | LDST_R | op_data | op_type | regoff << 16
+              | base << 5 | rd);
+}
+
+/* tcg_out_ldstp expects a scaled signed immediate offset */
+static inline void tcg_out_ldstp(TCGContext *s, int ext,
+                                 enum a64_ldstp_type op_type,
+                                 enum a64_ldstp_addr idx,
+                                 TCGReg rt, TCGReg rt2, TCGReg rn,
+                                 int simm7)
+{
+    unsigned int insn = ext ? (1 << 31) : 0;
+    simm7 &= 0x7f;
+    insn |= INSN_LDSTP | op_type | idx;
+    tcg_out32(s, insn | simm7 << 15 | rt2 << 10 | rn << 5 | rt);
+}
+
+static inline void tcg_out_arith(TCGContext *s, enum a64_insn_arith opc,
+                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm,
+                                 int shift_imm)
+{
+    /* Using shifted register arithmetic operations */
+    /* if extended register operation (64bit) just set bit 31. */
+    unsigned int shift, insn = ext ? (1 << 31) : 0;
+    insn |= INSN_ARITH | opc;
+
+    if (shift_imm == 0) {
+        shift = 0;
+    } else if (shift_imm > 0) {
+        shift = shift_imm << 10 | 1 << 22;
+    } else /* (shift_imm < 0) */ {
+        shift = (-shift_imm) << 10;
+    }
+    tcg_out32(s, insn | rm << 16 | shift | rn << 5 | rd);
+}
+
+static inline void tcg_out_imm_arith(TCGContext *s, enum a64_insn_imm_arith op,
+                                     int ext, TCGReg rd, TCGReg rn,
+                                     unsigned int aimm)
+{
+    /* add/sub immediate unsigned 12bit value (with LSL 0 or 12) */
+    /* if extended register operation (64bit) just set bit 31. */
+    unsigned int insn = ext ? (1 << 31) : 0;
+    insn |= INSN_IMM | op;
+
+    if (aimm <= 0xfff) {
+        aimm <<= 10;
+    } else {
+        /* we can only shift left by 12, on assert we cannot represent */
+        assert(!(aimm & 0xfff));
+        assert(aimm <= 0xfff000);
+        insn |= 1 << 22; /* apply LSL 12 */
+        aimm >>= 2;
+    }
+
+    tcg_out32(s, insn | aimm | (rn << 5) | rd);
+}
+
+static inline void tcg_out_addi(TCGContext *s, int ext,
+                                TCGReg rd, TCGReg rn, unsigned int aimm)
+{
+    tcg_out_imm_arith(s, IMM_ADD, ext, rd, rn, aimm);
+}
+
+static inline void tcg_out_subi(TCGContext *s, int ext,
+                                TCGReg rd, TCGReg rn, unsigned int aimm)
+{
+    tcg_out_imm_arith(s, IMM_SUB, ext, rd, rn, aimm);
 }
 
 static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src)
 {
-    /* register to register move using MOV (shifted register with no shift) */
-    /* using MOV 0x2a0003e0 | (shift).. */
-    unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
-    tcg_out32(s, base | src << 16 | rd);
+    /* register to register move using ORR (shifted register with no shift) */
+    tcg_out_arith(s, ARITH_OR, ext, rd, TCG_REG_XZR, src, 0);
+}
+
+static inline void tcg_out_movr_sp(TCGContext *s, int ext,
+                                   TCGReg rd, TCGReg rn)
+{
+    /* reg to reg move using immediate add, useful to move to/from SP */
+    tcg_out_imm_arith(s, IMM_ADD, ext, rd, rn, 0);
 }
 
 static inline void tcg_out_movi_aux(TCGContext *s,
                                     TCGReg rd, uint64_t value)
 {
-    uint32_t half, base, shift, movk = 0;
+    uint32_t half, insn, shift;
     /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
-    /* using MOVZ 0x52800000 | extended reg.. */
-    base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
+    insn = (value > 0xffffffff) ? 1 << 31 : 0;
+    insn |= INSN_MOVI | MOVZ;
     /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
        first MOVZ with the half-word immediate skipping the zeros, with a shift
        (LSL) equal to this number. Then morph all next instructions into MOVKs.
@@ -351,8 +514,8 @@  static inline void tcg_out_movi_aux(TCGContext *s,
     do {
         shift = ctz64(value) & (63 & -16);
         half = (value >> shift) & 0xffff;
-        tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
-        movk = 0x20000000; /* morph next MOVZs into MOVKs */
+        tcg_out32(s, insn | shift << 17 | half << 5 | rd);
+        insn |= MOVK; /* change MOVZs into MOVKs */
         value &= ~(0xffffUL << shift);
     } while (value);
 }
@@ -367,21 +530,9 @@  static inline void tcg_out_movi(TCGContext *s, TCGType type,
     }
 }
 
-static inline void tcg_out_ldst_r(TCGContext *s,
-                                  enum aarch64_ldst_op_data op_data,
-                                  enum aarch64_ldst_op_type op_type,
-                                  TCGReg rd, TCGReg base, TCGReg regoff)
-{
-    /* load from memory to register using base + 64bit register offset */
-    /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
-    /* the 0x6000 is for the "no extend field" */
-    tcg_out32(s, 0x00206800
-              | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
-}
-
 /* solve the whole ldst problem */
-static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
-                                enum aarch64_ldst_op_type type,
+static inline void tcg_out_ldst(TCGContext *s, enum a64_ldst_data data,
+                                enum a64_ldst_type type,
                                 TCGReg rd, TCGReg rn, tcg_target_long offset)
 {
     if (offset >= -256 && offset < 256) {
@@ -392,7 +543,7 @@  static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
     if (offset >= 256) {
         /* if the offset is naturally aligned and in range,
            then we can use the scaled uimm12 encoding */
-        unsigned int s_bits = data >> 6;
+        unsigned int s_bits = data >> 30;
         if (!(offset & ((1 << s_bits) - 1))) {
             tcg_target_ulong scaled_uimm = offset >> s_bits;
             if (scaled_uimm <= 0xfff) {
@@ -407,14 +558,6 @@  static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
     tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
 }
 
-/* mov alias implemented with add immediate, useful to move to/from SP */
-static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn)
-{
-    /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
-    unsigned int base = ext ? 0x91000000 : 0x11000000;
-    tcg_out32(s, base | rn << 5 | rd);
-}
-
 static inline void tcg_out_mov(TCGContext *s,
                                TCGType type, TCGReg ret, TCGReg arg)
 {
@@ -437,62 +580,47 @@  static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
                  arg, arg1, arg2);
 }
 
-static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
-                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm,
-                                 int shift_imm)
+static inline void tcg_out_mul(TCGContext *s, enum a64_insn_mul op, int ext,
+                               TCGReg rd, TCGReg rn, TCGReg ra, TCGReg rm)
 {
-    /* Using shifted register arithmetic operations */
-    /* if extended register operation (64bit) just OR with 0x80 << 24 */
-    unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24;
-    if (shift_imm == 0) {
-        shift = 0;
-    } else if (shift_imm > 0) {
-        shift = shift_imm << 10 | 1 << 22;
-    } else /* (shift_imm < 0) */ {
-        shift = (-shift_imm) << 10;
-    }
-    tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
+    unsigned int insn = ext ? (1 << 31) : 0;
+    insn |= INSN_MUL | op;
+    tcg_out32(s, insn | rm << 16 | ra << 10 | rn << 5 | rd);
 }
 
-static inline void tcg_out_mul(TCGContext *s, int ext,
-                               TCGReg rd, TCGReg rn, TCGReg rm)
+static inline void tcg_out_shift(TCGContext *s, enum a64_insn_shift shift_type,
+                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm)
 {
-    /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
-    unsigned int base = ext ? 0x9b007c00 : 0x1b007c00;
-    tcg_out32(s, base | rm << 16 | rn << 5 | rd);
+    unsigned int insn = ext ? (1 << 31) : 0;
+    insn |= INSN_SHIFT | shift_type;
+    tcg_out32(s, insn | rm << 16 | rn << 5 | rd);
 }
 
-static inline void tcg_out_shiftrot_reg(TCGContext *s,
-                                        enum aarch64_srr_opc opc, int ext,
-                                        TCGReg rd, TCGReg rn, TCGReg rm)
+static inline void tcg_out_bf(TCGContext *s, enum a64_insn_bf op,
+                              int ext, TCGReg rd, TCGReg rn,
+                              unsigned int rm_immr, unsigned int imms)
 {
-    /* using 2-source data processing instructions 0x1ac02000 */
-    unsigned int base = ext ? 0x9ac02000 : 0x1ac02000;
-    tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
+    unsigned int insn = ext ? (1 << 31) | (1 << 22) : 0;
+    insn |= INSN_BF | op;
+    tcg_out32(s, insn | rm_immr << 16 | imms << 10 | rn << 5 | rd);
 }
 
 static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
-                                unsigned int a, unsigned int b)
+                                unsigned int immr, unsigned int imms)
 {
-    /* Using UBFM 0x53000000 Wd, Wn, a, b */
-    unsigned int base = ext ? 0xd3400000 : 0x53000000;
-    tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
+    tcg_out_bf(s, BF_UBFM, ext, rd, rn, immr, imms);
 }
 
 static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
-                                unsigned int a, unsigned int b)
+                                unsigned int immr, unsigned int imms)
 {
-    /* Using SBFM 0x13000000 Wd, Wn, a, b */
-    unsigned int base = ext ? 0x93400000 : 0x13000000;
-    tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
+    tcg_out_bf(s, BF_SBFM, ext, rd, rn, immr, imms);
 }
 
 static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd,
-                                TCGReg rn, TCGReg rm, unsigned int a)
+                                TCGReg rn, TCGReg rm, unsigned int imms)
 {
-    /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
-    unsigned int base = ext ? 0x93c00000 : 0x13800000;
-    tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
+    tcg_out_bf(s, BF_EXTR, ext, rd, rn, rm, imms);
 }
 
 static inline void tcg_out_shl(TCGContext *s, int ext,
@@ -541,11 +669,20 @@  static inline void tcg_out_cmp(TCGContext *s, int ext, TCGReg rn, TCGReg rm,
     tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
 }
 
+static inline void tcg_out_cs(TCGContext *s, enum a64_insn_cs op, int ext,
+                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
+{
+    unsigned int cond, insn = ext ? (1 << 31) : 0;
+    cond = tcg_cond_to_aarch64[tcg_invert_cond(c)];
+
+    insn |= INSN_CS | op;
+    tcg_out32(s, insn | rm << 16 | cond << 12 | rn << 5 | rd);
+}
+
 static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c)
 {
-    /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
-    unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
-    tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
+    /* Using CSET alias of CSINC Xd, XZR, XZR, cond */
+    tcg_out_cs(s, CS_CSINC, ext, rd, TCG_REG_XZR, TCG_REG_XZR, c);
 }
 
 static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
@@ -558,7 +695,7 @@  static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
         tcg_abort();
     }
 
-    tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
+    tcg_out32(s, INSN_B | (offset & 0x03ffffff));
 }
 
 static inline void tcg_out_goto_noaddr(TCGContext *s)
@@ -569,7 +706,7 @@  static inline void tcg_out_goto_noaddr(TCGContext *s)
        Mask away possible garbage in the high bits for the first translation,
        while keeping the offset bits for retranslation. */
     uint32_t insn;
-    insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
+    insn = (tcg_in32(s) & 0x03ffffff) | INSN_B;
     tcg_out32(s, insn);
 }
 
@@ -578,7 +715,7 @@  static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
     /* see comments in tcg_out_goto_noaddr */
     uint32_t insn;
     insn = tcg_in32(s) & (0x07ffff << 5);
-    insn |= 0x54000000 | tcg_cond_to_aarch64[c];
+    insn |= INSN_B_COND | tcg_cond_to_aarch64[c];
     tcg_out32(s, insn);
 }
 
@@ -594,17 +731,17 @@  static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c,
     }
 
     offset &= 0x7ffff;
-    tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
+    tcg_out32(s, INSN_B_COND | tcg_cond_to_aarch64[c] | offset << 5);
 }
 
 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
 {
-    tcg_out32(s, 0xd63f0000 | reg << 5);
+    tcg_out32(s, INSN_BLR | reg << 5);
 }
 
 static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
 {
-    tcg_out32(s, 0xd61f0000 | reg << 5);
+    tcg_out32(s, INSN_BR | reg << 5);
 }
 
 static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
@@ -617,10 +754,16 @@  static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
         tcg_out_callr(s, TCG_REG_TMP);
     } else {
-        tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
+        tcg_out32(s, INSN_BL | (offset & 0x03ffffff));
     }
 }
 
+static inline void tcg_out_ret(TCGContext *s)
+{
+    /* emit RET { LR } */
+    tcg_out32(s, INSN_RET | TCG_REG_LR << 5);
+}
+
 /* encode a logical immediate, mapping user parameter
    M=set bits pattern length to S=M-1 */
 static inline unsigned int
@@ -630,35 +773,28 @@  aarch64_limm(unsigned int m, unsigned int r)
     return r << 16 | (m - 1) << 10;
 }
 
-/* test a register against an immediate bit pattern made of
-   M set bits rotated right by R.
+/* perform a logical operation on a register against an immediate bit pattern
+   made of M set bits rotated right by R.
    Examples:
    to test a 32/64 reg against 0x00000007, pass M = 3,  R = 0.
    to test a 32/64 reg against 0x000000ff, pass M = 8,  R = 0.
    to test a 32bit reg against 0xff000000, pass M = 8,  R = 8.
    to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
  */
-static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn,
-                               unsigned int m, unsigned int r)
+static inline void tcg_out_imm_log(TCGContext *s, enum a64_insn_imm_log op,
+                                   int ext, TCGReg rd, TCGReg rn,
+                                   unsigned int m, unsigned int r)
 {
-    /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
-    unsigned int base = ext ? 0xf240001f : 0x7200001f;
-    tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
+    unsigned int insn = ext ? (1 << 31) | (1 << 22) : 0;
+    insn |= INSN_IMM | op;
+    tcg_out32(s, insn | aarch64_limm(m, r) | rn << 5 | rd);
 }
 
-/* and a register with a bit pattern, similarly to TST, no flags change */
+/* and a register with a bit pattern, without changing flags */
 static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
                                 unsigned int m, unsigned int r)
 {
-    /* using AND 0x12000000 */
-    unsigned int base = ext ? 0x92400000 : 0x12000000;
-    tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
-}
-
-static inline void tcg_out_ret(TCGContext *s)
-{
-    /* emit RET { LR } */
-    tcg_out32(s, 0xd65f03c0);
+    tcg_out_imm_log(s, IMM_AND, ext, rd, rn, m, r);
 }
 
 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
@@ -703,16 +839,16 @@  static inline void tcg_out_goto_label_cond(TCGContext *s,
 
 static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
 {
-    /* using REV 0x5ac00800 */
-    unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
-    tcg_out32(s, base | rm << 5 | rd);
+    unsigned int insn = ext ? (1 << 31) | (1 << 10) : 0;
+    insn |= INSN_REV;
+    tcg_out32(s, insn | rm << 5 | rd);
 }
 
 static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
 {
-    /* using REV16 0x5ac00400 */
-    unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
-    tcg_out32(s, base | rm << 5 | rd);
+    unsigned int insn = ext ? (1 << 31) : 0;
+    insn |= INSN_REV16;
+    tcg_out32(s, insn | rm << 5 | rd);
 }
 
 static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits,
@@ -733,49 +869,9 @@  static inline void tcg_out_uxt(TCGContext *s, int s_bits,
     tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 }
 
-static inline void tcg_out_addi(TCGContext *s, int ext,
-                                TCGReg rd, TCGReg rn, unsigned int aimm)
-{
-    /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
-    /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
-    unsigned int base = ext ? 0x91000000 : 0x11000000;
-
-    if (aimm <= 0xfff) {
-        aimm <<= 10;
-    } else {
-        /* we can only shift left by 12, on assert we cannot represent */
-        assert(!(aimm & 0xfff));
-        assert(aimm <= 0xfff000);
-        base |= 1 << 22; /* apply LSL 12 */
-        aimm >>= 2;
-    }
-
-    tcg_out32(s, base | aimm | (rn << 5) | rd);
-}
-
-static inline void tcg_out_subi(TCGContext *s, int ext,
-                                TCGReg rd, TCGReg rn, unsigned int aimm)
-{
-    /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
-    /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
-    unsigned int base = ext ? 0xd1000000 : 0x51000000;
-
-    if (aimm <= 0xfff) {
-        aimm <<= 10;
-    } else {
-        /* we can only shift left by 12, on assert we cannot represent */
-        assert(!(aimm & 0xfff));
-        assert(aimm <= 0xfff000);
-        base |= 1 << 22; /* apply LSL 12 */
-        aimm >>= 2;
-    }
-
-    tcg_out32(s, base | aimm | (rn << 5) | rd);
-}
-
 static inline void tcg_out_nop(TCGContext *s)
 {
-    tcg_out32(s, 0xd503201f);
+    tcg_out32(s, INSN_NOP);
 }
 
 #ifdef CONFIG_SOFTMMU
@@ -1047,44 +1143,47 @@  static uint8_t *tb_ret_addr;
    ret
 */
 
-/* push r1 and r2, and alloc stack space for a total of
-   alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
-static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
-                                     TCGReg r1, TCGReg r2, int alloc_n)
+/* push r1 and r2, and alloc stack space for a total of alloc_npairs
+   (1 pair=16 bytes, must be between 1 and 31. */
+static inline void tcg_out_push_pair(TCGContext *s, TCGReg base,
+                                     TCGReg r1, TCGReg r2,
+                                     unsigned int alloc_npairs)
 {
-    /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
-       | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
-    assert(alloc_n > 0 && alloc_n < 0x20);
-    alloc_n = (-alloc_n) & 0x3f;
-    tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
+    int scaled_offset;
+    assert(alloc_npairs > 0 && alloc_npairs < 0x20);
+    scaled_offset = -(alloc_npairs * 2); /* scaled offset is in 8-byte elements */
+    tcg_out_ldstp(s, 1, LDSTP_ST, LDSTP_PRE, r1, r2, base, scaled_offset);
 }
 
 /* dealloc stack space for a total of alloc_n elements and pop r1, r2.  */
-static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
-                                    TCGReg r1, TCGReg r2, int alloc_n)
+static inline void tcg_out_pop_pair(TCGContext *s, TCGReg base,
+                                    TCGReg r1, TCGReg r2,
+                                    unsigned int alloc_npairs)
 {
-    /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
-       | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
-    assert(alloc_n > 0 && alloc_n < 0x20);
-    tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
+    int scaled_offset;
+    assert(alloc_npairs > 0 && alloc_npairs < 0x20);
+    scaled_offset = (alloc_npairs * 2); /* scaled offset is in 8-byte elements */
+    tcg_out_ldstp(s, 1, LDSTP_LD, LDSTP_POST, r1, r2, base, scaled_offset);
 }
 
-static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
-                                      TCGReg r1, TCGReg r2, int idx)
+static inline void tcg_out_store_pair(TCGContext *s, TCGReg base,
+                                      TCGReg r1, TCGReg r2,
+                                      unsigned int pair_idx)
 {
-    /* using register pair offset simm7 STP 0x29000000 | (ext)
-       | idx << 16 | r2 << 10 | addr << 5 | r1 */
-    assert(idx > 0 && idx < 0x20);
-    tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
+    int scaled_offset;
+    assert(pair_idx > 0 && pair_idx < 0x20);
+    scaled_offset = (pair_idx * 2); /* scaled offset is in 8-byte elements */
+    tcg_out_ldstp(s, 1, LDSTP_ST, LDSTP_NOIDX, r1, r2, base, scaled_offset);
 }
 
-static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
-                                     TCGReg r1, TCGReg r2, int idx)
+static inline void tcg_out_load_pair(TCGContext *s, TCGReg base,
+                                     TCGReg r1, TCGReg r2,
+                                     unsigned int pair_idx)
 {
-    /* using register pair offset simm7 LDP 0x29400000 | (ext)
-       | idx << 16 | r2 << 10 | addr << 5 | r1 */
-    assert(idx > 0 && idx < 0x20);
-    tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
+    int scaled_offset;
+    assert(pair_idx > 0 && pair_idx < 0x20);
+    scaled_offset = (pair_idx * 2); /* scaled offset is in 8-byte elements */
+    tcg_out_ldstp(s, 1, LDSTP_LD, LDSTP_NOIDX, r1, r2, base, scaled_offset);
 }
 
 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
@@ -1193,7 +1292,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_mul_i64:
         ext = 1; /* fall through */
     case INDEX_op_mul_i32:
-        tcg_out_mul(s, ext, args[0], args[1], args[2]);
+        tcg_out_mul(s, MUL_MADD, ext, args[0], args[1], TCG_REG_XZR, args[2]);
         break;
 
     case INDEX_op_shl_i64:
@@ -1202,7 +1301,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (const_args[2]) {    /* LSL / UBFM Wd, Wn, (32 - m) */
             tcg_out_shl(s, ext, args[0], args[1], args[2]);
         } else {                /* LSL / LSLV */
-            tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
+            tcg_out_shift(s, SHIFT_SHL, ext, args[0], args[1], args[2]);
         }
         break;
 
@@ -1212,7 +1311,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (const_args[2]) {    /* LSR / UBFM Wd, Wn, m, 31 */
             tcg_out_shr(s, ext, args[0], args[1], args[2]);
         } else {                /* LSR / LSRV */
-            tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
+            tcg_out_shift(s, SHIFT_SHR, ext, args[0], args[1], args[2]);
         }
         break;
 
@@ -1222,7 +1321,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (const_args[2]) {    /* ASR / SBFM Wd, Wn, m, 31 */
             tcg_out_sar(s, ext, args[0], args[1], args[2]);
         } else {                /* ASR / ASRV */
-            tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
+            tcg_out_shift(s, SHIFT_SAR, ext, args[0], args[1], args[2]);
         }
         break;
 
@@ -1232,7 +1331,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, m */
             tcg_out_rotr(s, ext, args[0], args[1], args[2]);
         } else {                /* ROR / RORV */
-            tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
+            tcg_out_shift(s, SHIFT_ROR, ext, args[0], args[1], args[2]);
         }
         break;
 
@@ -1244,8 +1343,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         } else {
             tcg_out_arith(s, ARITH_SUB, 0,
                           TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
-            tcg_out_shiftrot_reg(s, SRR_ROR, ext,
-                                 args[0], args[1], TCG_REG_TMP);
+            tcg_out_shift(s, SHIFT_ROR, ext, args[0], args[1], TCG_REG_TMP);
         }
         break;
 
@@ -1474,7 +1572,7 @@  static void tcg_target_init(TCGContext *s)
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
     /* NB: frame sizes are in 16 byte stack units! */
-    int frame_size_callee_saved, frame_size_tcg_locals;
+    unsigned int frame_size_callee_saved, frame_size_tcg_locals;
     TCGReg r;
 
     /* save pairs             (FP, LR) and (X19, X20) .. (X27, X28) */