diff mbox series

[v6,55/61] target/riscv: integer extract instruction

Message ID 20200317150653.9008-56-zhiwei_liu@c-sky.com
State New
Headers show
Series target/riscv: support vector extension v0.7.1 | expand

Commit Message

LIU Zhiwei March 17, 2020, 3:06 p.m. UTC
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/insn32.decode              |  1 +
 target/riscv/insn_trans/trans_rvv.inc.c | 91 +++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

Comments

Richard Henderson March 28, 2020, 3:36 a.m. UTC | #1
On 3/17/20 8:06 AM, LIU Zhiwei wrote:
> +/* Integer Extract Instruction */
> +static void extract_element(TCGv dest, TCGv_ptr base,
> +                            int ofs, int sew)
> +{
> +    switch (sew) {
> +    case MO_8:
> +        tcg_gen_ld8u_tl(dest, base, ofs);
> +        break;
> +    case MO_16:
> +        tcg_gen_ld16u_tl(dest, base, ofs);
> +        break;
> +    default:
> +        tcg_gen_ld32u_tl(dest, base, ofs);
> +        break;
> +#if TARGET_LONG_BITS == 64
> +    case MO_64:
> +        tcg_gen_ld_i64(dest, base, ofs);
> +        break;
> +#endif
> +    }
> +}

I just remembered that this doesn't handle HOST_WORDS_BIGENDIAN properly -- the
MO_64 case for TARGET_LONG_BITS == 32.

Because we computed the offset for MO_64, not MO_32, we need

    case MO_64:
        if (TARGET_LONG_BITS == 64) {
            tcg_gen_ld_i64(dest, base, ofs);
            break;
        }
#ifdef HOST_WORDS_BIGENDIAN
        ofs += 4;
#endif
        /* fall through */
    case MO_32:
        tcg_gen_ld32u_tl(dest, base, ofs);
        break;
    default:
        g_assert_not_reached();


r~
LIU Zhiwei March 28, 2020, 4:23 p.m. UTC | #2
On 2020/3/28 11:36, Richard Henderson wrote:
> On 3/17/20 8:06 AM, LIU Zhiwei wrote:
>> +/* Integer Extract Instruction */
>> +static void extract_element(TCGv dest, TCGv_ptr base,
>> +                            int ofs, int sew)
>> +{
>> +    switch (sew) {
>> +    case MO_8:
>> +        tcg_gen_ld8u_tl(dest, base, ofs);
>> +        break;
>> +    case MO_16:
>> +        tcg_gen_ld16u_tl(dest, base, ofs);
>> +        break;
>> +    default:
>> +        tcg_gen_ld32u_tl(dest, base, ofs);
>> +        break;
>> +#if TARGET_LONG_BITS == 64
>> +    case MO_64:
>> +        tcg_gen_ld_i64(dest, base, ofs);
>> +        break;
>> +#endif
>> +    }
>> +}
> I just remembered that this doesn't handle HOST_WORDS_BIGENDIAN properly -- the
> MO_64 case for TARGET_LONG_BITS == 32.
>
> Because we computed the offset for MO_64, not MO_32, we need
>
>      case MO_64:
>          if (TARGET_LONG_BITS == 64) {
>              tcg_gen_ld_i64(dest, base, ofs);
>              break;
>          }
> #ifdef HOST_WORDS_BIGENDIAN
>          ofs += 4;
> #endif
>          /* fall through */
>      case MO_32:
>          tcg_gen_ld32u_tl(dest, base, ofs);
>          break;
>      default:
>          g_assert_not_reached();
Yes, it should be.

As extract_element and gather_element are very similar . I
will merge them to  load_element in v7.

Zhiwei
>
> r~
diff mbox series

Patch

diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 1231628cb2..26dd0f1b1b 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -561,6 +561,7 @@  vmsif_m         010110 . ..... 00011 010 ..... 1010111 @r2_vm
 vmsof_m         010110 . ..... 00010 010 ..... 1010111 @r2_vm
 viota_m         010110 . ..... 10000 010 ..... 1010111 @r2_vm
 vid_v           010110 . 00000 10001 010 ..... 1010111 @r1_vm
+vext_x_v        001100 1 ..... ..... 010 ..... 1010111 @r
 
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
index fae72acaa1..4d7bb6b54e 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -2347,3 +2347,94 @@  static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
     }
     return false;
 }
+
+/*
+ *** Vector Permutation Instructions
+ */
+/* Integer Extract Instruction */
+static void extract_element(TCGv dest, TCGv_ptr base,
+                            int ofs, int sew)
+{
+    switch (sew) {
+    case MO_8:
+        tcg_gen_ld8u_tl(dest, base, ofs);
+        break;
+    case MO_16:
+        tcg_gen_ld16u_tl(dest, base, ofs);
+        break;
+    default:
+        tcg_gen_ld32u_tl(dest, base, ofs);
+        break;
+#if TARGET_LONG_BITS == 64
+    case MO_64:
+        tcg_gen_ld_i64(dest, base, ofs);
+        break;
+#endif
+    }
+}
+
+/* offset of the idx element with base regsiter r */
+static uint32_t endian_ofs(DisasContext *s, int r, int idx)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew);
+#else
+    return vreg_ofs(s, r) + (idx << s->sew);
+#endif
+}
+
+/* adjust the index according to the endian */
+static void endian_adjust(TCGv_i32 ofs, int sew)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
+#endif
+}
+
+static bool trans_vext_x_v(DisasContext *s, arg_r *a)
+{
+    TCGv dest = tcg_temp_new();
+
+    if (a->rs1 == 0) {
+        /* Special case vmv.x.s rd, vs2. */
+        extract_element(dest, cpu_env,
+                        endian_ofs(s, a->rs2, 0), s->sew);
+    } else {
+        int vlen = s->vlen >> (3 + s->sew);
+        TCGv_i32 ofs = tcg_temp_new_i32();
+        TCGv_ptr  base = tcg_temp_new_ptr();
+        TCGv t_vlen, t_zero;
+
+        /*
+         * Mask the index to the length so that we do
+         * not produce an out-of-range load.
+         */
+        tcg_gen_trunc_tl_i32(ofs, cpu_gpr[a->rs1]);
+        tcg_gen_andi_i32(ofs, ofs, vlen - 1);
+
+        /* Convert the index to an offset. */
+        endian_adjust(ofs, s->sew);
+        tcg_gen_shli_i32(ofs, ofs, s->sew);
+
+        /* Convert the index to a pointer. */
+        tcg_gen_ext_i32_ptr(base, ofs);
+        tcg_gen_add_ptr(base, base, cpu_env);
+
+        /* Perform the load. */
+        extract_element(dest, base,
+                        vreg_ofs(s, a->rs2), s->sew);
+        tcg_temp_free_ptr(base);
+        tcg_temp_free_i32(ofs);
+
+        /* Flush out-of-range indexing to zero.  */
+        t_vlen = tcg_const_tl(vlen);
+        t_zero = tcg_const_tl(0);
+        tcg_gen_movcond_tl(TCG_COND_LTU, dest, cpu_gpr[a->rs1],
+                           t_vlen, dest, t_zero);
+        tcg_temp_free(t_vlen);
+        tcg_temp_free(t_zero);
+    }
+    gen_set_gpr(a->rd, dest);
+    tcg_temp_free(dest);
+    return true;
+}