diff mbox

[4/9] target-arm: A64: add support for ld/st with reg offset

Message ID 1386612744-1013-5-git-send-email-peter.maydell@linaro.org
State New
Headers show

Commit Message

Peter Maydell Dec. 9, 2013, 6:12 p.m. UTC
From: Alex Bennée <alex.bennee@linaro.org>

This adds support for the load/store forms using a register offset.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/translate-a64.c | 141 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 140 insertions(+), 1 deletion(-)

Comments

Richard Henderson Dec. 9, 2013, 9:09 p.m. UTC | #1
On 12/09/2013 10:12 AM, Peter Maydell wrote:
> From: Alex Bennée <alex.bennee@linaro.org>
> 
> This adds support for the load/store forms using a register offset.
> 
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  target-arm/translate-a64.c | 141 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 140 insertions(+), 1 deletion(-)
> 
> diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
> index ea3abc3..64f98a7 100644
> --- a/target-arm/translate-a64.c
> +++ b/target-arm/translate-a64.c
> @@ -467,6 +467,54 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
>      tcg_temp_free_i64(tmphi);
>  }
>  
> +/*
> + * This utility function is for doing register extension with an
> + * optional shift. You will likely want to pass a temporary for the
> + * destination register. See DecodeRegExtend() in the ARM ARM.
> + */
> +static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
> +                              int option, unsigned int shift)
> +{
> +    int extsize = extract32(option, 0, 2);
> +    bool is_signed = extract32(option, 2, 1);
> +
> +    if (is_signed) {
> +        switch (extsize) {
> +        case 0:
> +            tcg_gen_ext8s_i64(tcg_out, tcg_in);
> +            break;
> +        case 1:
> +            tcg_gen_ext16s_i64(tcg_out, tcg_in);
> +            break;
> +        case 2:
> +            tcg_gen_ext32s_i64(tcg_out, tcg_in);
> +            break;
> +        case 3:
> +            tcg_gen_mov_i64(tcg_out, tcg_in);
> +            break;
> +        }
> +    } else {
> +        switch (extsize) {
> +        case 0:
> +            tcg_gen_ext8u_i64(tcg_out, tcg_in);
> +            break;
> +        case 1:
> +            tcg_gen_ext16u_i64(tcg_out, tcg_in);
> +            break;
> +        case 2:
> +            tcg_gen_ext32u_i64(tcg_out, tcg_in);
> +            break;
> +        case 3:
> +            tcg_gen_mov_i64(tcg_out, tcg_in);
> +            break;
> +        }
> +    }
> +
> +    if (shift) {
> +        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
> +    }
> +}
> +
>  static inline void gen_check_sp_alignment(DisasContext *s)
>  {
>      /* The AArch64 architecture mandates that (if enabled via PSTATE
> @@ -1048,6 +1096,93 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
>      }
>  }
>  
> +
> +
> +/*
> + * C3.3.10 Load/store (register offset)
> + *
> + * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
> + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
> + * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
> + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
> + *
> + * For non-vector:
> + *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
> + *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
> + * For vector:
> + *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
> + *   opc<0>: 0 -> store, 1 -> load
> + * V: 1 -> vector/simd
> + * opt: extend encoding (see DecodeRegExtend)
> + * S: if S=1 then scale (essentially index by sizeof(size))
> + * Rt: register to transfer into/out of
> + * Rn: address register or SP for base
> + * Rm: offset register or ZR for offset
> + */
> +static void handle_ldst_reg_roffset(DisasContext *s, uint32_t insn)
> +{
> +    int rt = extract32(insn, 0, 5);
> +    int rn = extract32(insn, 5, 5);
> +    int shift = extract32(insn, 12, 1);
> +    int rm = extract32(insn, 16, 5);
> +    int opc = extract32(insn, 22, 2);
> +    int opt = extract32(insn, 13, 3);
> +    int size = extract32(insn, 30, 2);
> +    bool is_signed = false;
> +    bool is_store = false;
> +    bool is_extended = false;
> +    bool is_vector = extract32(insn, 26, 1);
> +
> +    TCGv_i64 tcg_rm;
> +    TCGv_i64 tcg_addr;
> +
> +    if (extract32(opt, 1, 1) == 0) {
> +        unallocated_encoding(s);
> +        return;
> +    }
> +
> +    if (is_vector) {
> +        size |= (opc & 2) << 1;
> +        if (size > 4) {
> +            unallocated_encoding(s);
> +        }
> +        is_store = ((opc & 1) == 0);
> +    } else {
> +        if (size == 3 && opc == 2) {
> +            /* PRFM - prefetch */
> +            return;
> +        }
> +        is_store = (opc == 0);
> +        is_signed = opc & (1<<1);
> +        is_extended = (size < 3) && (opc & 1);
> +    }
> +
> +    if (rn == 31) {
> +        gen_check_sp_alignment(s);
> +    }
> +    tcg_addr = read_cpu_reg_sp(s, rn, 1);
> +
> +    tcg_rm = read_cpu_reg(s, rm, 1);
> +    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
> +
> +    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
> +
> +    if (is_vector) {
> +        if (is_store) {
> +            do_fp_st(s, rt, tcg_addr, size);
> +        } else {
> +            do_fp_ld(s, rt, tcg_addr, size);
> +        }
> +    } else {
> +        TCGv_i64 tcg_rt = cpu_reg(s, rt);
> +        if (is_store) {
> +            do_gpr_st(s, tcg_rt, tcg_addr, size);
> +        } else {
> +            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
> +        }
> +    }
> +}

I wonder if it would be better to merge this function with the one from the
previous patch.  There are only 3-4 lines that are different.


r~
Alex Bennée Dec. 10, 2013, 2:16 p.m. UTC | #2
rth@twiddle.net writes:

> On 12/09/2013 10:12 AM, Peter Maydell wrote:
>> From: Alex Bennée <alex.bennee@linaro.org>
>> 
>> This adds support for the load/store forms using a register offset.
>> 
<snip>
>> +/*
>> + * C3.3.10 Load/store (register offset)
>> + *
>> + * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
>> + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
>> + * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
>> + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
>> + *
>> + * For non-vector:
>> + *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
>> + *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
>> + * For vector:
>> + *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
>> + *   opc<0>: 0 -> store, 1 -> load
>> + * V: 1 -> vector/simd
>> + * opt: extend encoding (see DecodeRegExtend)
>> + * S: if S=1 then scale (essentially index by sizeof(size))
>> + * Rt: register to transfer into/out of
>> + * Rn: address register or SP for base
>> + * Rm: offset register or ZR for offset
>> + */
>> +static void handle_ldst_reg_roffset(DisasContext *s, uint32_t insn)
>> +{
>> +    int rt = extract32(insn, 0, 5);
>> +    int rn = extract32(insn, 5, 5);
>> +    int shift = extract32(insn, 12, 1);
>> +    int rm = extract32(insn, 16, 5);
>> +    int opc = extract32(insn, 22, 2);
>> +    int opt = extract32(insn, 13, 3);
>> +    int size = extract32(insn, 30, 2);
>> +    bool is_signed = false;
>> +    bool is_store = false;
>> +    bool is_extended = false;
>> +    bool is_vector = extract32(insn, 26, 1);
>> +
>> +    TCGv_i64 tcg_rm;
>> +    TCGv_i64 tcg_addr;
>> +
>> +    if (extract32(opt, 1, 1) == 0) {
>> +        unallocated_encoding(s);
>> +        return;
>> +    }
>> +
>> +    if (is_vector) {
>> +        size |= (opc & 2) << 1;
>> +        if (size > 4) {
>> +            unallocated_encoding(s);
>> +        }
>> +        is_store = ((opc & 1) == 0);
>> +    } else {
>> +        if (size == 3 && opc == 2) {
>> +            /* PRFM - prefetch */
>> +            return;
>> +        }
>> +        is_store = (opc == 0);
>> +        is_signed = opc & (1<<1);
>> +        is_extended = (size < 3) && (opc & 1);
>> +    }
>> +
>> +    if (rn == 31) {
>> +        gen_check_sp_alignment(s);
>> +    }
>> +    tcg_addr = read_cpu_reg_sp(s, rn, 1);
>> +
>> +    tcg_rm = read_cpu_reg(s, rm, 1);
>> +    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
>> +
>> +    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
>> +
>> +    if (is_vector) {
>> +        if (is_store) {
>> +            do_fp_st(s, rt, tcg_addr, size);
>> +        } else {
>> +            do_fp_ld(s, rt, tcg_addr, size);
>> +        }
>> +    } else {
>> +        TCGv_i64 tcg_rt = cpu_reg(s, rt);
>> +        if (is_store) {
>> +            do_gpr_st(s, tcg_rt, tcg_addr, size);
>> +        } else {
>> +            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
>> +        }
>> +    }
>> +}
>
> I wonder if it would be better to merge this function with the one from the
> previous patch.  There are only 3-4 lines that are different.

I was trying to avoid having it jump through special cases when decoding
the instruction where Rm, opt, S class with the imm field in the other
form. But certainly there is an argument for the rest of the common code
to be shared (size/opc decoding and final tcg_addr based load/store).

However my preference unless there is a strong objection would be to
clean that up in later patches. For one thing the more instructions each
patch handles the longer it takes to run the instruction validation on
the rather slow models to have good coverage of the decoder!

>
>
> r~
Richard Henderson Dec. 10, 2013, 3:59 p.m. UTC | #3
On 12/10/2013 06:16 AM, Alex Bennée wrote:
> However my preference unless there is a strong objection would be to
> clean that up in later patches. For one thing the more instructions each
> patch handles the longer it takes to run the instruction validation on
> the rather slow models to have good coverage of the decoder!

That'd be ok by me.


r~
Alex Bennée Dec. 11, 2013, 10:01 p.m. UTC | #4
rth@twiddle.net writes:

> On 12/10/2013 06:16 AM, Alex Bennée wrote:
>> However my preference unless there is a strong objection would be to
>> clean that up in later patches. For one thing the more instructions each
>> patch handles the longer it takes to run the instruction validation on
>> the rather slow models to have good coverage of the decoder!
>
> That'd be ok by me.

I had a play trying to see what pulling out the common parts of decode
and unallocated handling based on the ARM ARM pseudo-code into a
separate function would look like. Unfortunately what I ended up
with was a horrible function full of pass-by-reference parameters and a
not particularly cleaner or shorter call-sites in each handler.

I did briefly consider if I could construct a macro which would make for
less duplicated code but suspect that won't help in the long run.

This is certainly something I think that requires more thought. In the
meantime I've addresses your other review comments and Peter should be
pushing a new set of patches soon.


Cheers,

--
Alex Bennée
QEMU/KVM Hacker for Linaro
diff mbox

Patch

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index ea3abc3..64f98a7 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -467,6 +467,54 @@  static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
     tcg_temp_free_i64(tmphi);
 }
 
+/*
+ * This utility function is for doing register extension with an
+ * optional shift. You will likely want to pass a temporary for the
+ * destination register. See DecodeRegExtend() in the ARM ARM.
+ */
+static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
+                              int option, unsigned int shift)
+{
+    int extsize = extract32(option, 0, 2);
+    bool is_signed = extract32(option, 2, 1);
+
+    if (is_signed) {
+        switch (extsize) {
+        case 0:
+            tcg_gen_ext8s_i64(tcg_out, tcg_in);
+            break;
+        case 1:
+            tcg_gen_ext16s_i64(tcg_out, tcg_in);
+            break;
+        case 2:
+            tcg_gen_ext32s_i64(tcg_out, tcg_in);
+            break;
+        case 3:
+            tcg_gen_mov_i64(tcg_out, tcg_in);
+            break;
+        }
+    } else {
+        switch (extsize) {
+        case 0:
+            tcg_gen_ext8u_i64(tcg_out, tcg_in);
+            break;
+        case 1:
+            tcg_gen_ext16u_i64(tcg_out, tcg_in);
+            break;
+        case 2:
+            tcg_gen_ext32u_i64(tcg_out, tcg_in);
+            break;
+        case 3:
+            tcg_gen_mov_i64(tcg_out, tcg_in);
+            break;
+        }
+    }
+
+    if (shift) {
+        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
+    }
+}
+
 static inline void gen_check_sp_alignment(DisasContext *s)
 {
     /* The AArch64 architecture mandates that (if enabled via PSTATE
@@ -1048,6 +1096,93 @@  static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     }
 }
 
+
+
+/*
+ * C3.3.10 Load/store (register offset)
+ *
+ * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
+ * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
+ * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
+ * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
+ *
+ * For non-vector:
+ *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
+ *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
+ * For vector:
+ *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
+ *   opc<0>: 0 -> store, 1 -> load
+ * V: 1 -> vector/simd
+ * opt: extend encoding (see DecodeRegExtend)
+ * S: if S=1 then scale (essentially index by sizeof(size))
+ * Rt: register to transfer into/out of
+ * Rn: address register or SP for base
+ * Rm: offset register or ZR for offset
+ */
+static void handle_ldst_reg_roffset(DisasContext *s, uint32_t insn)
+{
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int shift = extract32(insn, 12, 1);
+    int rm = extract32(insn, 16, 5);
+    int opc = extract32(insn, 22, 2);
+    int opt = extract32(insn, 13, 3);
+    int size = extract32(insn, 30, 2);
+    bool is_signed = false;
+    bool is_store = false;
+    bool is_extended = false;
+    bool is_vector = extract32(insn, 26, 1);
+
+    TCGv_i64 tcg_rm;
+    TCGv_i64 tcg_addr;
+
+    if (extract32(opt, 1, 1) == 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (is_vector) {
+        size |= (opc & 2) << 1;
+        if (size > 4) {
+            unallocated_encoding(s);
+        }
+        is_store = ((opc & 1) == 0);
+    } else {
+        if (size == 3 && opc == 2) {
+            /* PRFM - prefetch */
+            return;
+        }
+        is_store = (opc == 0);
+        is_signed = opc & (1<<1);
+        is_extended = (size < 3) && (opc & 1);
+    }
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+
+    tcg_rm = read_cpu_reg(s, rm, 1);
+    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
+
+    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
+
+    if (is_vector) {
+        if (is_store) {
+            do_fp_st(s, rt, tcg_addr, size);
+        } else {
+            do_fp_ld(s, rt, tcg_addr, size);
+        }
+    } else {
+        TCGv_i64 tcg_rt = cpu_reg(s, rt);
+        if (is_store) {
+            do_gpr_st(s, tcg_rt, tcg_addr, size);
+        } else {
+            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
+        }
+    }
+}
+
 /*
  * C3.3.13 Load/store (unsigned immediate)
  *
@@ -1125,7 +1260,11 @@  static void disas_ldst_reg(DisasContext *s, uint32_t insn)
 {
     switch (extract32(insn, 24, 2)) {
     case 0:
-        unsupported_encoding(s, insn);
+        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
+            handle_ldst_reg_roffset(s, insn);
+        } else {
+            unsupported_encoding(s, insn);
+        }
         break;
     case 1:
         handle_ldst_reg_unsigned_imm(s, insn);