diff mbox series

[RFC,v2,43/76] target/riscv: rvv-0.9: integer extension instructions

Message ID 20200722091641.8834-44-frank.chang@sifive.com
State New
Headers show
Series target/riscv: support vector extension v0.9 | expand

Commit Message

Frank Chang July 22, 2020, 9:16 a.m. UTC
From: Frank Chang <frank.chang@sifive.com>

Add the following instructions:

* vzext.vf2
* vzext.vf4
* vzext.vf8
* vsext.vf2
* vsext.vf4
* vsext.vf8

Signed-off-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/helper.h                   | 14 ++++
 target/riscv/insn32.decode              |  8 +++
 target/riscv/insn_trans/trans_rvv.inc.c | 87 +++++++++++++++++++++++++
 target/riscv/vector_helper.c            | 34 ++++++++++
 4 files changed, 143 insertions(+)

Comments

Richard Henderson July 30, 2020, 8:35 p.m. UTC | #1
On 7/22/20 2:16 AM, frank.chang@sifive.com wrote:
> +    static gen_helper_gvec_3_ptr * const fns[6][4] = {
> +        {
> +            NULL, gen_helper_vzext_vf2_h,
> +            gen_helper_vzext_vf2_w, gen_helper_vzext_vf2_d
> +        },
> +        {
> +            NULL, NULL,
> +            gen_helper_vzext_vf4_w, gen_helper_vzext_vf4_d,
> +        },
> +        {
> +            NULL, NULL,
> +            NULL, gen_helper_vzext_vf8_d
> +        },
> +        {
> +            NULL, gen_helper_vsext_vf2_h,
> +            gen_helper_vsext_vf2_w, gen_helper_vsext_vf2_d
> +        },
> +        {
> +            NULL, NULL,
> +            gen_helper_vsext_vf4_w, gen_helper_vsext_vf4_d,
> +        },
> +        {
> +            NULL, NULL,
> +            NULL, gen_helper_vsext_vf8_d
> +        }
> +    };

I don't understand why there aren't more functions in this table.  As far as I
can see, the only NULLs should be at [*][0].


r~
Frank Chang July 31, 2020, 10:17 a.m. UTC | #2
On Fri, Jul 31, 2020 at 4:35 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 7/22/20 2:16 AM, frank.chang@sifive.com wrote:
> > +    static gen_helper_gvec_3_ptr * const fns[6][4] = {
> > +        {
> > +            NULL, gen_helper_vzext_vf2_h,
> > +            gen_helper_vzext_vf2_w, gen_helper_vzext_vf2_d
> > +        },
> > +        {
> > +            NULL, NULL,
> > +            gen_helper_vzext_vf4_w, gen_helper_vzext_vf4_d,
> > +        },
> > +        {
> > +            NULL, NULL,
> > +            NULL, gen_helper_vzext_vf8_d
> > +        },
> > +        {
> > +            NULL, gen_helper_vsext_vf2_h,
> > +            gen_helper_vsext_vf2_w, gen_helper_vsext_vf2_d
> > +        },
> > +        {
> > +            NULL, NULL,
> > +            gen_helper_vsext_vf4_w, gen_helper_vsext_vf4_d,
> > +        },
> > +        {
> > +            NULL, NULL,
> > +            NULL, gen_helper_vsext_vf8_d
> > +        }
> > +    };
>
> I don't understand why there aren't more functions in this table.  As far
> as I
> can see, the only NULLs should be at [*][0].
>
>
> r~
>

As source EEW has to be 1/2, 1/4, 1/8 of SEW and the source EEW must be
a supported width (Section 12.3).

Shouldn't it impossible to have the cases, e.g.
vzext.vf4 with SEW = 16, i.e. EEW = SEW / 4 = 4 bits
vzext.vf8 with SEW = 16, i.e. EEW = SEW / 8 = 2 bits
vzext.vf8 with SEW = 32, i.e. EEW = SEW / 8 = 4 bits

Frank Chang
Richard Henderson July 31, 2020, 5:30 p.m. UTC | #3
On 7/31/20 3:17 AM, Frank Chang wrote:
> On Fri, Jul 31, 2020 at 4:35 AM Richard Henderson <richard.henderson@linaro.org
>     I don't understand why there aren't more functions in this table.  As far as I
>     can see, the only NULLs should be at [*][0].
> 
> 
> As source EEW has to be 1/2, 1/4, 1/8 of SEW and the source EEW must be
> a supported width (Section 12.3).

Oh, duh, of course.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
diff mbox series

Patch

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 1dea171599..7eca91e510 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1097,4 +1097,18 @@  DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
 
+DEF_HELPER_5(vzext_vf2_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf2_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf2_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf4_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf4_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf8_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vsext_vf2_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf2_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf2_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf4_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf4_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf8_d, void, ptr, ptr, ptr, env, i32)
+
 DEF_HELPER_3(narrower_nanbox_fpr, i64, i64, i32, env)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 0e1d6b3ead..5c31936a92 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -600,5 +600,13 @@  vmv2r_v         100111 1 ..... 00001 011 ..... 1010111 @r2rd
 vmv4r_v         100111 1 ..... 00011 011 ..... 1010111 @r2rd
 vmv8r_v         100111 1 ..... 00111 011 ..... 1010111 @r2rd
 
+# Vector Integer Extension
+vzext_vf2       010010 . ..... 00110 010 ..... 1010111 @r2_vm
+vzext_vf4       010010 . ..... 00100 010 ..... 1010111 @r2_vm
+vzext_vf8       010010 . ..... 00010 010 ..... 1010111 @r2_vm
+vsext_vf2       010010 . ..... 00111 010 ..... 1010111 @r2_vm
+vsext_vf4       010010 . ..... 00101 010 ..... 1010111 @r2_vm
+vsext_vf8       010010 . ..... 00011 010 ..... 1010111 @r2_vm
+
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
index 85f22a1495..e18ca432b7 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -3554,3 +3554,90 @@  GEN_VMV_WHOLE_TRANS(vmv1r_v, 1)
 GEN_VMV_WHOLE_TRANS(vmv2r_v, 2)
 GEN_VMV_WHOLE_TRANS(vmv4r_v, 4)
 GEN_VMV_WHOLE_TRANS(vmv8r_v, 8)
+
+static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div)
+{
+    uint32_t from = (1 << (s->sew + 3)) / div;
+    bool ret = require_rvv(s);
+    ret &= (from >= 8 && from <= 64) &&
+           (a->rd != a->rs2) &&
+           require_align(a->rd, s->flmul) &&
+           require_align(a->rs2, s->flmul / div) &&
+           require_vm(a->vm, a->rd);
+    if ((s->flmul / div) < 1) {
+        ret &= require_noover(a->rd, s->flmul, a->rs2, s->flmul / div);
+    } else {
+        ret &= require_noover_widen(a->rd, s->flmul, a->rs2, s->flmul / div);
+    }
+    return ret;
+}
+
+static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
+{
+    uint32_t data = 0;
+    gen_helper_gvec_3_ptr *fn;
+    TCGLabel *over = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+
+    static gen_helper_gvec_3_ptr * const fns[6][4] = {
+        {
+            NULL, gen_helper_vzext_vf2_h,
+            gen_helper_vzext_vf2_w, gen_helper_vzext_vf2_d
+        },
+        {
+            NULL, NULL,
+            gen_helper_vzext_vf4_w, gen_helper_vzext_vf4_d,
+        },
+        {
+            NULL, NULL,
+            NULL, gen_helper_vzext_vf8_d
+        },
+        {
+            NULL, gen_helper_vsext_vf2_h,
+            gen_helper_vsext_vf2_w, gen_helper_vsext_vf2_d
+        },
+        {
+            NULL, NULL,
+            gen_helper_vsext_vf4_w, gen_helper_vsext_vf4_d,
+        },
+        {
+            NULL, NULL,
+            NULL, gen_helper_vsext_vf8_d
+        }
+    };
+
+    fn = fns[seq][s->sew];
+    if (fn == NULL) {
+        return false;
+    }
+
+    data = FIELD_DP32(data, VDATA, VM, a->vm);
+    data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+    data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
+
+    tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                       vreg_ofs(s, a->rs2), cpu_env, 0,
+                       s->vlen / 8, data, fn);
+
+    mark_vs_dirty(s);
+    gen_set_label(over);
+    return true;
+}
+
+/* Vector Integer Extension */
+#define GEN_INT_EXT_TRANS(NAME, DIV, SEQ)             \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
+{                                                     \
+    if (int_ext_check(s, a, DIV)) {                   \
+        return int_ext_op(s, a, SEQ);                 \
+    }                                                 \
+    return false;                                     \
+}
+
+GEN_INT_EXT_TRANS(vzext_vf2, 2, 0)
+GEN_INT_EXT_TRANS(vzext_vf4, 4, 1)
+GEN_INT_EXT_TRANS(vzext_vf8, 8, 2)
+GEN_INT_EXT_TRANS(vsext_vf2, 2, 3)
+GEN_INT_EXT_TRANS(vsext_vf4, 4, 4)
+GEN_INT_EXT_TRANS(vsext_vf8, 8, 5)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index c1ed0ff6ad..8516570e5f 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5002,3 +5002,37 @@  GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb)
 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh)
 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl)
 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq)
+
+/* Vector Integer Extension */
+#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1, CLEAR_FN)       \
+void HELPER(NAME)(void *vd, void *v0, void *vs2,                      \
+                  CPURISCVState *env, uint32_t desc)                  \
+{                                                                     \
+    uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);      \
+    uint32_t vta = vext_vta(desc);                                    \
+    uint32_t vl = env->vl;                                            \
+    uint32_t vm = vext_vm(desc);                                      \
+    uint32_t i;                                                       \
+                                                                      \
+    for (i = 0; i < vl; i++) {                                        \
+        if (!vm && !vext_elem_mask(v0, i)) {                          \
+            continue;                                                 \
+        }                                                             \
+        *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));            \
+    }                                                                 \
+    CLEAR_FN(vd, vta, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
+}
+
+GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1, clearh)
+GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2, clearl)
+GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4, clearq)
+GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1, clearl)
+GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2, clearq)
+GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1, clearq)
+
+GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1, clearh)
+GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2, clearl)
+GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4, clearq)
+GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1, clearl)
+GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2, clearq)
+GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1, clearq)