diff mbox

[15/60] AArch64: Add add instruction family emulation

Message ID 1380242934-20953-16-git-send-email-agraf@suse.de
State New
Headers show

Commit Message

Alexander Graf Sept. 27, 2013, 12:48 a.m. UTC
This patch adds support for add and friends.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-arm/helper-a64.c    |  85 +++++++++++++++++++++++++
 target-arm/helper-a64.h    |   3 +
 target-arm/translate-a64.c | 150 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 238 insertions(+)

Comments

Richard Henderson Sept. 27, 2013, 6:51 p.m. UTC | #1
On 09/26/2013 05:48 PM, Alexander Graf wrote:
> +    tcg_gen_mov_i64(tcg_src, cpu_reg(source));
> +    tcg_dst = cpu_reg(dest);
> +    if (extend) {
> +        if ((shift_amount & 0x7) > 4) {
> +            /* reserved value */
> +            unallocated_encoding(s);
> +        }
> +        if (!setflags) {
> +            tcg_gen_mov_i64(tcg_src, cpu_reg_sp(source));
> +            tcg_dst = cpu_reg_sp(dest);
> +        }
> +    } else {
> +        if (shift_type == 3) {
> +            /* reserved value */
> +            unallocated_encoding(s);
> +        }
> +        if (is_32bit && (shift_amount < 0)) {
> +            /* reserved value */
> +            unallocated_encoding(s);
> +        }
> +    }

You'd do better to load up the source and destination TCGv values in that IF
sequence, and emit one tcg_gen_mov_i64 afterward.

At the moment you're emitting two for the extend && !setflags case.

> +    if (extend) {
> +        tcg_op2 = tcg_temp_new_i64();
> +        reg_extend(tcg_op2, shift_amount >> 3, shift_amount & 0x7, rm);
> +    } else {
> +        tcg_op2 = get_shifti(rm, shift_type, shift_amount, is_32bit);
> +    }

Why does get_shifti return a temp, but reg_extend requires one to be passed in?

> +    if (is_32bit) {
> +        tcg_gen_ext32s_i64(tcg_src, tcg_src);
> +        tcg_gen_ext32s_i64(tcg_op2, tcg_op2);
> +    }

Why?  You'll zero-extend the result, and the flags setting will truncate the
inputs itself.

> +    if (sub_op) {
> +        tcg_gen_sub_i64(tcg_result, tcg_src, tcg_op2);
> +    } else {
> +        tcg_gen_add_i64(tcg_result, tcg_src, tcg_op2);
> +    }
> +
> +    if (is_carry) {
> +        TCGv_i64 tcg_carry = tcg_temp_new_i64();
> +        tcg_gen_shri_i64(tcg_carry, pstate, PSTATE_C_SHIFT);
> +        tcg_gen_andi_i64(tcg_carry, tcg_carry, 1);
> +        tcg_gen_add_i64(tcg_result, tcg_result, tcg_carry);
> +        if (sub_op) {
> +            tcg_gen_subi_i64(tcg_result, tcg_result, 1);
> +        }
> +        tcg_temp_free_i64(tcg_carry);
> +    }

For sub_op && is_carry, it's probably better to do exactly what the manual
says, rd = rn + ~rm + C, as opposed to rd = rn - rm + c - 1 as you do here.

This will be especially true if you eventually split up the flags as is done on
the A32 side.  One can compute rd plus the new carry via add2.


r~
diff mbox

Patch

diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index da72b7f..2400b6e 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -52,3 +52,88 @@  uint32_t HELPER(pstate_add)(uint32_t pstate, uint64_t a1, uint64_t a2,
 
     return pstate;
 }
+
+uint32_t HELPER(pstate_add32)(uint32_t pstate, uint64_t x1, uint64_t x2,
+                              uint64_t xr)
+{
+    uint32_t a1 = x1;
+    uint32_t a2 = x2;
+    uint32_t ar = xr;
+
+    int32_t s1 = a1;
+    int32_t s2 = a2;
+    int32_t sr = ar;
+
+    pstate &= ~(PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V);
+
+    if (sr < 0) {
+        pstate |= PSTATE_N;
+    }
+
+    if (!ar) {
+        pstate |= PSTATE_Z;
+    }
+
+    if (ar && (ar < a1)) {
+        pstate |= PSTATE_C;
+    }
+
+    if ((s1 > 0 && s2 > 0 && sr < 0) ||
+        (s1 < 0 && s2 < 0 && sr > 0)) {
+        pstate |= PSTATE_V;
+    }
+
+    return pstate;
+}
+
+uint32_t HELPER(pstate_sub)(uint32_t pstate, uint64_t a1, uint64_t a2,
+                            uint64_t ar)
+{
+    int64_t sr = ar;
+    int64_t s1 = a1;
+    int64_t s2 = a2;
+
+    pstate = helper_pstate_add(pstate, a1, a2, ar);
+
+    pstate &= ~(PSTATE_C | PSTATE_V);
+
+    if (a2 <= a1) {
+        pstate |= PSTATE_C;
+    }
+
+    /* XXX check if this is the only special case */
+    if ((!a1 && a2 == 0x8000000000000000ULL) ||
+        (s1 > 0 && s2 < 0 && sr < 0) ||
+        (s1 < 0 && s2 > 0 && sr > 0)) {
+        pstate |= PSTATE_V;
+    }
+
+    return pstate;
+}
+
+uint32_t HELPER(pstate_sub32)(uint32_t pstate, uint64_t x1, uint64_t x2,
+                              uint64_t xr)
+{
+    uint32_t a1 = x1;
+    uint32_t a2 = x2;
+    uint32_t ar = xr;
+    int32_t sr = ar;
+    int32_t s1 = a1;
+    int32_t s2 = a2;
+
+    pstate = helper_pstate_add32(pstate, a1, a2, ar);
+
+    pstate &= ~(PSTATE_C | PSTATE_V);
+
+    if (a2 <= a1) {
+        pstate |= PSTATE_C;
+    }
+
+    if ((!a1 && a2 == 0x80000000ULL) ||
+        (s1 > 0 && s2 < 0 && sr < 0) ||
+        (s1 < 0 && s2 > 0 && sr > 0)) {
+        pstate |= PSTATE_V;
+    }
+
+    return pstate;
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index 1492b15..4deab64 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -18,3 +18,6 @@ 
  */
 
 DEF_HELPER_FLAGS_4(pstate_add, TCG_CALL_NO_RWG_SE, i32, i32, i64, i64, i64)
+DEF_HELPER_FLAGS_4(pstate_add32, TCG_CALL_NO_RWG_SE, i32, i32, i64, i64, i64)
+DEF_HELPER_FLAGS_4(pstate_sub, TCG_CALL_NO_RWG_SE, i32, i32, i64, i64, i64)
+DEF_HELPER_FLAGS_4(pstate_sub32, TCG_CALL_NO_RWG_SE, i32, i32, i64, i64, i64)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 2a80715..a0df55c 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -601,6 +601,153 @@  static void handle_orr(DisasContext *s, uint32_t insn)
     tcg_temp_free_i64(tcg_op2);
 }
 
+static void setflags_add(bool sub_op, bool is_32bit, TCGv_i64 src,
+                         TCGv_i64 op2, TCGv_i64 res)
+{
+    if (sub_op) {
+        if (is_32bit) {
+            gen_helper_pstate_sub32(pstate, pstate, src, op2, res);
+        } else {
+            gen_helper_pstate_sub(pstate, pstate, src, op2, res);
+        }
+    } else {
+        if (is_32bit) {
+            gen_helper_pstate_add32(pstate, pstate, src, op2, res);
+        } else {
+            gen_helper_pstate_add(pstate, pstate, src, op2, res);
+        }
+    }
+}
+
+static void reg_extend(TCGv_i64 tcg_offset, int option, int shift, int reg)
+{
+    int extsize = get_bits(option, 0, 2);
+    bool is_signed = get_bits(option, 2, 1);
+
+    if (is_signed) {
+        switch (extsize) {
+        case 0:
+            tcg_gen_ext8s_i64(tcg_offset, cpu_reg(reg));
+            break;
+        case 1:
+            tcg_gen_ext16s_i64(tcg_offset, cpu_reg(reg));
+            break;
+        case 2:
+            tcg_gen_ext32s_i64(tcg_offset, cpu_reg(reg));
+            break;
+        case 3:
+            tcg_gen_mov_i64(tcg_offset, cpu_reg(reg));
+            break;
+        }
+    } else {
+        switch (extsize) {
+        case 0:
+            tcg_gen_ext8u_i64(tcg_offset, cpu_reg(reg));
+            break;
+        case 1:
+            tcg_gen_ext16u_i64(tcg_offset, cpu_reg(reg));
+            break;
+        case 2:
+            tcg_gen_ext32u_i64(tcg_offset, cpu_reg(reg));
+            break;
+        case 3:
+            tcg_gen_mov_i64(tcg_offset, cpu_reg(reg));
+            break;
+        }
+    }
+
+    if (shift) {
+        tcg_gen_shli_i64(tcg_offset, tcg_offset, shift);
+    }
+}
+
+static void handle_add(DisasContext *s, uint32_t insn)
+{
+    int dest = get_reg(insn);
+    int source = get_bits(insn, 5, 5);
+    int shift_amount = get_sbits(insn, 10, 6);
+    int rm = get_bits(insn, 16, 5);
+    bool extend = get_bits(insn, 21, 1);
+    int shift_type = get_bits(insn, 22, 2);
+    bool is_carry = (get_bits(insn, 24, 5) == 0x1a);
+    bool setflags = get_bits(insn, 29, 1);
+    bool sub_op = get_bits(insn, 30, 1);
+    bool is_32bit = !get_bits(insn, 31, 1);
+    TCGv_i64 tcg_op2;
+    TCGv_i64 tcg_src = tcg_temp_new_i64();
+    TCGv_i64 tcg_dst;
+    TCGv_i64 tcg_result = tcg_temp_new_i64();
+
+    if (extend && shift_type) {
+        unallocated_encoding(s);
+    }
+
+    tcg_gen_mov_i64(tcg_src, cpu_reg(source));
+    tcg_dst = cpu_reg(dest);
+    if (extend) {
+        if ((shift_amount & 0x7) > 4) {
+            /* reserved value */
+            unallocated_encoding(s);
+        }
+        if (!setflags) {
+            tcg_gen_mov_i64(tcg_src, cpu_reg_sp(source));
+            tcg_dst = cpu_reg_sp(dest);
+        }
+    } else {
+        if (shift_type == 3) {
+            /* reserved value */
+            unallocated_encoding(s);
+        }
+        if (is_32bit && (shift_amount < 0)) {
+            /* reserved value */
+            unallocated_encoding(s);
+        }
+    }
+
+    if (extend) {
+        tcg_op2 = tcg_temp_new_i64();
+        reg_extend(tcg_op2, shift_amount >> 3, shift_amount & 0x7, rm);
+    } else {
+        tcg_op2 = get_shifti(rm, shift_type, shift_amount, is_32bit);
+    }
+
+    if (is_32bit) {
+        tcg_gen_ext32s_i64(tcg_src, tcg_src);
+        tcg_gen_ext32s_i64(tcg_op2, tcg_op2);
+    }
+
+    if (sub_op) {
+        tcg_gen_sub_i64(tcg_result, tcg_src, tcg_op2);
+    } else {
+        tcg_gen_add_i64(tcg_result, tcg_src, tcg_op2);
+    }
+
+    if (is_carry) {
+        TCGv_i64 tcg_carry = tcg_temp_new_i64();
+        tcg_gen_shri_i64(tcg_carry, pstate, PSTATE_C_SHIFT);
+        tcg_gen_andi_i64(tcg_carry, tcg_carry, 1);
+        tcg_gen_add_i64(tcg_result, tcg_result, tcg_carry);
+        if (sub_op) {
+            tcg_gen_subi_i64(tcg_result, tcg_result, 1);
+        }
+        tcg_temp_free_i64(tcg_carry);
+    }
+
+    if (setflags) {
+        setflags_add(sub_op, is_32bit, tcg_src, tcg_op2, tcg_result);
+    }
+
+    if (is_32bit) {
+        tcg_gen_ext32u_i64(tcg_dst, tcg_result);
+    } else {
+        tcg_gen_mov_i64(tcg_dst, tcg_result);
+    }
+
+    tcg_temp_free_i64(tcg_src);
+    tcg_temp_free_i64(tcg_op2);
+    tcg_temp_free_i64(tcg_result);
+}
+
 void disas_a64_insn(CPUARMState *env, DisasContext *s)
 {
     uint32_t insn;
@@ -636,6 +783,9 @@  void disas_a64_insn(CPUARMState *env, DisasContext *s)
     case 0x0a:
         handle_orr(s, insn);
         break;
+    case 0x0b:
+        handle_add(s, insn);
+        break;
     case 0x0c:
         if (get_bits(insn, 29, 1)) {
             handle_stp(s, insn);