diff mbox

[1/9] target-arm: A64: add support for stp (store pair)

Message ID 1386612744-1013-2-git-send-email-peter.maydell@linaro.org
State New
Headers show

Commit Message

Peter Maydell Dec. 9, 2013, 6:12 p.m. UTC
From: Alex Bennée <alex.bennee@linaro.org>

This patch support the basic store-pair instructions and includes
the generic store helper functions:

  * do_gpr_st()
  * do_fp_st()
  * gen_check_sp_alignment()

The last function gen_check_sp_alignment() is a NULL op currently but
put in place to make it easy to add SP alignment checking later.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/translate-a64.c | 220 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 218 insertions(+), 2 deletions(-)

Comments

Richard Henderson Dec. 9, 2013, 8:17 p.m. UTC | #1
On 12/09/2013 10:12 AM, Peter Maydell wrote:
> +static void do_gpr_st(DisasContext *s, TCGv_i64 source,
> +                      TCGv_i64 tcg_addr, int size)
> +{
> +    switch (size) {
> +    case 0:
> +        tcg_gen_qemu_st8(source, tcg_addr, get_mem_index(s));
> +        break;
> +    case 1:
> +        tcg_gen_qemu_st16(source, tcg_addr, get_mem_index(s));
> +        break;
> +    case 2:
> +        tcg_gen_qemu_st32(source, tcg_addr, get_mem_index(s));
> +        break;
> +    case 3:
> +        tcg_gen_qemu_st64(source, tcg_addr, get_mem_index(s));
> +        break;

Please use the new ldst entry points.  In this case,

  tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size)

since size is already log2.

> +static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
> +{
> +    /* This writes the bottom N bits of a 128 bit wide vector to memory */
> +    int freg_offs = offsetof(CPUARMState, vfp.regs[srcidx * 2]);
> +    TCGv_i64 tmp = tcg_temp_new_i64();
> +
> +    switch (size) {
> +    case 0:
> +        tcg_gen_ld8u_i64(tmp, cpu_env, freg_offs);
> +        tcg_gen_qemu_st8(tmp, tcg_addr, get_mem_index(s));
> +        break;
> +    case 1:
> +        tcg_gen_ld16u_i64(tmp, cpu_env, freg_offs);
> +        tcg_gen_qemu_st16(tmp, tcg_addr, get_mem_index(s));
> +        break;
> +    case 2:
> +        tcg_gen_ld32u_i64(tmp, cpu_env, freg_offs);
> +        tcg_gen_qemu_st32(tmp, tcg_addr, get_mem_index(s));
> +        break;
> +    case 3:
> +        tcg_gen_ld_i64(tmp, cpu_env, freg_offs);
> +        tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
> +        break;
> +    case 4:
> +    {
> +        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
> +        tcg_gen_ld_i64(tmp, cpu_env, freg_offs);
> +        tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
> +        tcg_gen_ld_i64(tmp, cpu_env, freg_offs + sizeof(float64));
> +        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
> +        tcg_gen_qemu_st64(tmp, tcg_hiaddr, get_mem_index(s));
> +        tcg_temp_free_i64(tcg_hiaddr);
> +        break;
> +    }

You'll certainly have to continue to special-case the 128-bit store, but the
other sizes need not be.

> +/*
> + * C5.6.177 STP (Store Pair - non vector)
> + * C6.3.284 STP (Store Pair of SIMD&FP)
> + *
> + *  31 30 29   27 26  25   23 22 21   15 14   10 9    5 4    0
> + * +-----+-------+---+-------+--+-----------------------------+
> + * | opc | 1 0 1 | V | index | 0|  imm7 |  Rt2  |  Rn  | Rt   |
> + * +-----+-------+---+-------+--+-------+-------+------+------+
> + *
> + * opc: STP           00 -> 32 bit, 10 -> 64 bit
> + *      STP (SIMD&FP) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
> + * idx: 001 -> post-index, 011 -> pre-index, 010 -> signed off
> + *
> + * Rt, Rt2 = GPR or SIMD registers to be stored
> + * Rn = general purpose register containing address
> + * imm7 = signed offset (multiple of 4 or 8 depending on size)
> + */
> +static void handle_stp(DisasContext *s, uint32_t insn)
> +{
> +    int rt = extract32(insn, 0, 5);
> +    int rn = extract32(insn, 5, 5);
> +    int rt2 = extract32(insn, 10, 5);
> +    int64_t offset = sextract32(insn, 15, 7);
> +    int type = extract32(insn, 23, 2);
> +    bool is_vector = extract32(insn, 26, 1);
> +    int opc = extract32(insn, 30, 2);
> +
> +    TCGv_i64 tcg_rt = cpu_reg(s, rt);
> +    TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);

If you're going to combine vector and non-vector STP, then I think it would be
cleaner if you didn't load these registers so early, when we're not even sure
if we're talking about general registers.

> +    if (wback) {
> +        if (postindex) {
> +            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
> +        } else {
> +            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
> +        }

Perhaps better as

    tcg_gen_addi_i64(tcg_addr, tcg_addr,
                     (postindex ? offset : 0) - (1 << size));


?


r~
Alex Bennée Dec. 10, 2013, 2:05 p.m. UTC | #2
rth@twiddle.net writes:

> On 12/09/2013 10:12 AM, Peter Maydell wrote:
<snip>
>> +    if (wback) {
>> +        if (postindex) {
>> +            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
>> +        } else {
>> +            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
>> +        }
>
> Perhaps better as
>
>     tcg_gen_addi_i64(tcg_addr, tcg_addr,
>                      (postindex ? offset : 0) - (1 << size));

I'm not so sure it's easier to follow despite my general proclivity for
ternary operator.

>
>
> ?
>
>
> r~
diff mbox

Patch

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 0a76130..56f2d6b 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -99,6 +99,16 @@  void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
     cpu_fprintf(f, "\n");
 }
 
+
+static int get_mem_index(DisasContext *s)
+{
+#ifdef CONFIG_USER_ONLY
+    return 1;
+#else
+    return s->user;
+#endif
+}
+
 void gen_a64_set_pc_im(uint64_t val)
 {
     tcg_gen_movi_i64(cpu_pc, val);
@@ -278,6 +288,94 @@  static inline void gen_logic_CC(int sf, TCGv_i64 result)
 }
 
 /*
+ * Load/Store generators
+ */
+
+/*
+  Store from GPR register to memory
+*/
+static void do_gpr_st(DisasContext *s, TCGv_i64 source,
+                      TCGv_i64 tcg_addr, int size)
+{
+    switch (size) {
+    case 0:
+        tcg_gen_qemu_st8(source, tcg_addr, get_mem_index(s));
+        break;
+    case 1:
+        tcg_gen_qemu_st16(source, tcg_addr, get_mem_index(s));
+        break;
+    case 2:
+        tcg_gen_qemu_st32(source, tcg_addr, get_mem_index(s));
+        break;
+    case 3:
+        tcg_gen_qemu_st64(source, tcg_addr, get_mem_index(s));
+        break;
+    default:
+        /* Bad size */
+        g_assert(false);
+        break;
+    }
+}
+
+/*
+ * Store from FP register to memory
+ */
+static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
+{
+    /* This writes the bottom N bits of a 128 bit wide vector to memory */
+    int freg_offs = offsetof(CPUARMState, vfp.regs[srcidx * 2]);
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    switch (size) {
+    case 0:
+        tcg_gen_ld8u_i64(tmp, cpu_env, freg_offs);
+        tcg_gen_qemu_st8(tmp, tcg_addr, get_mem_index(s));
+        break;
+    case 1:
+        tcg_gen_ld16u_i64(tmp, cpu_env, freg_offs);
+        tcg_gen_qemu_st16(tmp, tcg_addr, get_mem_index(s));
+        break;
+    case 2:
+        tcg_gen_ld32u_i64(tmp, cpu_env, freg_offs);
+        tcg_gen_qemu_st32(tmp, tcg_addr, get_mem_index(s));
+        break;
+    case 3:
+        tcg_gen_ld_i64(tmp, cpu_env, freg_offs);
+        tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
+        break;
+    case 4:
+    {
+        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
+        tcg_gen_ld_i64(tmp, cpu_env, freg_offs);
+        tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
+        tcg_gen_ld_i64(tmp, cpu_env, freg_offs + sizeof(float64));
+        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
+        tcg_gen_qemu_st64(tmp, tcg_hiaddr, get_mem_index(s));
+        tcg_temp_free_i64(tcg_hiaddr);
+        break;
+    }
+    default:
+        g_assert(false);
+        break;
+    }
+
+    tcg_temp_free_i64(tmp);
+}
+
+static inline void gen_check_sp_alignment(DisasContext *s)
+{
+    /* The AArch64 architecture mandates that (if enabled via PSTATE
+     * or SCTLR bits) there is a check that SP is 16-aligned on every
+     * SP-relative load or store (with an exception generated if it is not).
+     * In line with general QEMU practice regarding misaligned accesses,
+     * we omit these checks for the sake of guest program performance.
+     * This function is provided as a hook so we can more easily add these
+     * checks in future (possibly as a "favour catching guest program bugs
+     * over speed" user selectable option).
+     */
+}
+
+/*
  * the instruction disassembly implemented here matches
  * the instruction encoding classifications in chapter 3 (C3)
  * of the ARM Architecture Reference Manual (DDI0487A_a)
@@ -620,10 +718,128 @@  static void disas_ld_lit(DisasContext *s, uint32_t insn)
     unsupported_encoding(s, insn);
 }
 
-/* Load/store pair (all forms) */
+/*
+ * C5.6.177 STP (Store Pair - non vector)
+ * C6.3.284 STP (Store Pair of SIMD&FP)
+ *
+ *  31 30 29   27 26  25   23 22 21   15 14   10 9    5 4    0
+ * +-----+-------+---+-------+--+-----------------------------+
+ * | opc | 1 0 1 | V | index | 0|  imm7 |  Rt2  |  Rn  | Rt   |
+ * +-----+-------+---+-------+--+-------+-------+------+------+
+ *
+ * opc: STP           00 -> 32 bit, 10 -> 64 bit
+ *      STP (SIMD&FP) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
+ * idx: 001 -> post-index, 011 -> pre-index, 010 -> signed off
+ *
+ * Rt, Rt2 = GPR or SIMD registers to be stored
+ * Rn = general purpose register containing address
+ * imm7 = signed offset (multiple of 4 or 8 depending on size)
+ */
+static void handle_stp(DisasContext *s, uint32_t insn)
+{
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int rt2 = extract32(insn, 10, 5);
+    int64_t offset = sextract32(insn, 15, 7);
+    int type = extract32(insn, 23, 2);
+    bool is_vector = extract32(insn, 26, 1);
+    int opc = extract32(insn, 30, 2);
+
+    TCGv_i64 tcg_rt = cpu_reg(s, rt);
+    TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
+    TCGv_i64 tcg_addr; /* calculated address */
+    bool postindex = false;
+    bool wback = false;
+    int size;
+
+    if (is_vector) {
+        if (opc == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        size = 2 + opc;
+    } else {
+        size = 2 + extract32(opc, 1, 1);
+        if (opc & 1) {
+            unallocated_encoding(s);
+            return;
+        }
+    }
+
+    switch (type) {
+    case 1: /* STP (post-index) */
+        postindex = true;
+        wback = true;
+        break;
+    case 2: /* STP (signed offset), rn not updated */
+        postindex = false;
+        break;
+    case 3: /* STP (pre-index) */
+        postindex = false;
+        wback = true;
+        break;
+    default: /* Failed decoder tree? */
+        unallocated_encoding(s);
+        break;
+    }
+
+    offset <<= size;
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+
+    tcg_addr = tcg_temp_new_i64();
+    tcg_gen_mov_i64(tcg_addr, cpu_reg_sp(s, rn));
+
+    if (!postindex) {
+        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
+    }
+
+    if (is_vector) {
+        do_fp_st(s, rt, tcg_addr, size);
+    } else {
+        do_gpr_st(s, tcg_rt, tcg_addr, size);
+    }
+    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
+    if (is_vector) {
+        do_fp_st(s, rt2, tcg_addr, size);
+    } else {
+        do_gpr_st(s, tcg_rt2, tcg_addr, size);
+    }
+
+    if (wback) {
+        if (postindex) {
+            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
+        } else {
+            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
+        }
+        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
+    }
+
+    tcg_temp_free_i64(tcg_addr);
+}
+
+
+/* C2.2.3 Load/store pair (all non vector forms)
+ *
+ *  31 30 29   27 26  25   23  22 21   15 14   10 9    5 4    0
+ * +-----+-------+---+-------+---+-----------------------------+
+ * | opc | 1 0 1 | V | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
+ * +-----+-------+---+-------+---+-------+-------+------+------+
+ *
+ * L = 0 -> Store, 1 -> Load
+ * V = 0 -> non-vector, 1 -> vector (SIMD & FP)
+ */
 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int is_load = extract32(insn, 22, 1);
+
+    if (is_load) {
+        unsupported_encoding(s, insn);
+    } else {
+        handle_stp(s, insn);
+    }
 }
 
 /* Load/store register (all forms) */