diff mbox series

[v5,22/60] target/riscv: vector integer merge and move instructions

Message ID 20200312145900.2054-23-zhiwei_liu@c-sky.com
State New
Headers show
Series target/riscv: support vector extension v0.7.1 | expand

Commit Message

LIU Zhiwei March 12, 2020, 2:58 p.m. UTC
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/helper.h                   |  9 ++++
 target/riscv/insn32.decode              |  3 ++
 target/riscv/insn_trans/trans_rvv.inc.c | 24 ++++++++++
 target/riscv/vector_helper.c            | 58 +++++++++++++++++++++++++
 4 files changed, 94 insertions(+)

Comments

Richard Henderson March 14, 2020, 7:27 a.m. UTC | #1
On 3/12/20 7:58 AM, LIU Zhiwei wrote:
> +/* Vector Integer Merge and Move Instructions */
> +static bool opivv_vmerge_check(DisasContext *s, arg_rmrr *a)
> +{
> +    return (vext_check_isa_ill(s, RVV) &&
> +            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
> +            vext_check_reg(s, a->rd, false) &&
> +            vext_check_reg(s, a->rs2, false) &&
> +            vext_check_reg(s, a->rs1, false) &&
> +            ((a->vm == 0) || (a->rs2 == 0)));
> +}
> +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vmerge_check)
> +
> +static bool opivx_vmerge_check(DisasContext *s, arg_rmrr *a)
> +{
> +    return (vext_check_isa_ill(s, RVV) &&
> +            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
> +            vext_check_reg(s, a->rd, false) &&
> +            vext_check_reg(s, a->rs2, false) &&
> +            ((a->vm == 0) || (a->rs2 == 0)));
> +}
> +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vmerge_check)
> +
> +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vmerge_check)

I think you need to special case these.  The unmasked instructions are the
canonical move instructions: vmv.v.*.

You definitely want to use tcg_gen_gvec_mov (vv), tcg_gen_gvec_dup_i{32,64}
(vx) and tcg_gen_gvec_dup{8,16,32,64}i (vi).

> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {                   \
> +            ETYPE s2 = *((ETYPE *)vs2 + H(i));                       \
> +            *((ETYPE *)vd + H1(i)) = s2;                             \
> +        } else {                                                     \
> +            ETYPE s1 = *((ETYPE *)vs1 + H(i));                       \
> +            *((ETYPE *)vd + H(i)) = s1;                              \
> +        }                                                            \

Perhaps better as

ETYPE *vt = (!vm && !vext_elem_mask(v0, mlen, i) ? vs2 : vs1);
*((ETYPE *)vd + H(i)) = *((ETYPE *)vt + H(i));

> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {                   \
> +            ETYPE s2 = *((ETYPE *)vs2 + H(i));                       \
> +            *((ETYPE *)vd + H1(i)) = s2;                             \
> +        } else {                                                     \
> +            *((ETYPE *)vd + H(i)) = (ETYPE)(target_long)s1;          \
> +        }                                                            \

Perhaps better as

ETYPE s2 = *((ETYPE *)vs2 + H(i));
ETYPE d = (!vm && !vext_elem_mask(v0, mlen, i)
           ? s2 : (ETYPE)(target_long)s1);
*((ETYPE *)vd + H(i)) = d;

as most host platforms have a conditional reg-reg move, but not a conditional load.


r~
LIU Zhiwei March 16, 2020, 2:57 a.m. UTC | #2
On 2020/3/14 15:27, Richard Henderson wrote:
> On 3/12/20 7:58 AM, LIU Zhiwei wrote:
>> +/* Vector Integer Merge and Move Instructions */
>> +static bool opivv_vmerge_check(DisasContext *s, arg_rmrr *a)
>> +{
>> +    return (vext_check_isa_ill(s, RVV) &&
>> +            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
>> +            vext_check_reg(s, a->rd, false) &&
>> +            vext_check_reg(s, a->rs2, false) &&
>> +            vext_check_reg(s, a->rs1, false) &&
>> +            ((a->vm == 0) || (a->rs2 == 0)));
>> +}
>> +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vmerge_check)
>> +
>> +static bool opivx_vmerge_check(DisasContext *s, arg_rmrr *a)
>> +{
>> +    return (vext_check_isa_ill(s, RVV) &&
>> +            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
>> +            vext_check_reg(s, a->rd, false) &&
>> +            vext_check_reg(s, a->rs2, false) &&
>> +            ((a->vm == 0) || (a->rs2 == 0)));
>> +}
>> +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vmerge_check)
>> +
>> +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vmerge_check)
> I think you need to special case these.  The unmasked instructions are the
> canonical move instructions: vmv.v.*.
>
> You definitely want to use tcg_gen_gvec_mov (vv), tcg_gen_gvec_dup_i{32,64}
> (vx) and tcg_gen_gvec_dup{8,16,32,64}i (vi).
I have a question here.

Are these GVEC IRsĀ  proper for any vl, or just when vl equals vlmax?
I see there are some align assert in these GVEC IR.

Now the code is like

static bool trans_vmv_v_v(DisasContext *s, arg_r *a)
{
     if (vext_check_isa_ill(s, RVV) &&
         vext_check_reg(s, a->rd, false) &&
         vext_check_reg(s, a->rs1, false)) {

         if (s->vl_eq_vlmax) {
             tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
                              vreg_ofs(s, a->rs1),
                              MAXSZ(s), MAXSZ(s));
         } else {
             uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
             static gen_helper_gvec_2_ptr * const fns[4] = {
                 gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
                 gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
             };

             tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
                                cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
         }
         return true;
     }
     return false;
}

Is it right?

Zhiwei
>
>> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {                   \
>> +            ETYPE s2 = *((ETYPE *)vs2 + H(i));                       \
>> +            *((ETYPE *)vd + H1(i)) = s2;                             \
>> +        } else {                                                     \
>> +            ETYPE s1 = *((ETYPE *)vs1 + H(i));                       \
>> +            *((ETYPE *)vd + H(i)) = s1;                              \
>> +        }                                                            \
> Perhaps better as
>
> ETYPE *vt = (!vm && !vext_elem_mask(v0, mlen, i) ? vs2 : vs1);
> *((ETYPE *)vd + H(i)) = *((ETYPE *)vt + H(i));
>
>> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {                   \
>> +            ETYPE s2 = *((ETYPE *)vs2 + H(i));                       \
>> +            *((ETYPE *)vd + H1(i)) = s2;                             \
>> +        } else {                                                     \
>> +            *((ETYPE *)vd + H(i)) = (ETYPE)(target_long)s1;          \
>> +        }                                                            \
> Perhaps better as
>
> ETYPE s2 = *((ETYPE *)vs2 + H(i));
> ETYPE d = (!vm && !vext_elem_mask(v0, mlen, i)
>             ? s2 : (ETYPE)(target_long)s1);
> *((ETYPE *)vd + H(i)) = d;
>
> as most host platforms have a conditional reg-reg move, but not a conditional load.
>
>
> r~
Richard Henderson March 16, 2020, 5:32 a.m. UTC | #3
On 3/15/20 7:57 PM, LIU Zhiwei wrote:
>> You definitely want to use tcg_gen_gvec_mov (vv), tcg_gen_gvec_dup_i{32,64}
>> (vx) and tcg_gen_gvec_dup{8,16,32,64}i (vi).
> I have a question here.
> 
> Are these GVEC IRsĀ  proper for any vl, or just when vl equals vlmax?
> I see there are some align assert in these GVEC IR.

Only vl_eq_vlmax.  I should have been more precise.
But I expect this boolean to be true quite often.

> 
> Now the code is like
> 
> static bool trans_vmv_v_v(DisasContext *s, arg_r *a)
> {
>     if (vext_check_isa_ill(s, RVV) &&
>         vext_check_reg(s, a->rd, false) &&
>         vext_check_reg(s, a->rs1, false)) {
> 
>         if (s->vl_eq_vlmax) {
>             tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
>                              vreg_ofs(s, a->rs1),
>                              MAXSZ(s), MAXSZ(s));
>         } else {
>             uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
>             static gen_helper_gvec_2_ptr * const fns[4] = {
>                 gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
>                 gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
>             };
> 
>             tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
>                                cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
>         }
>         return true;
>     }
>     return false;
> }
> 
> Is it right?

Yes, that looks fine.


r~
diff mbox series

Patch

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 1f0d3d60e3..121e9e57e7 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -665,3 +665,12 @@  DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vmerge_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmerge_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmerge_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmerge_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmerge_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmerge_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmerge_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmerge_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 2a5b945139..bcb8273bcc 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -399,6 +399,9 @@  vwmacc_vx       111101 . ..... ..... 110 ..... 1010111 @r_vm
 vwmaccsu_vv     111110 . ..... ..... 010 ..... 1010111 @r_vm
 vwmaccsu_vx     111110 . ..... ..... 110 ..... 1010111 @r_vm
 vwmaccus_vx     111111 . ..... ..... 110 ..... 1010111 @r_vm
+vmerge_vvm      010111 . ..... ..... 000 ..... 1010111 @r_vm
+vmerge_vxm      010111 . ..... ..... 100 ..... 1010111 @r_vm
+vmerge_vim      010111 . ..... ..... 011 ..... 1010111 @r_vm
 
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
index 958737d097..aff5ca8663 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -1481,3 +1481,27 @@  GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
 GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
 GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
 GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
+
+/* Vector Integer Merge and Move Instructions */
+static bool opivv_vmerge_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s, RVV) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            ((a->vm == 0) || (a->rs2 == 0)));
+}
+GEN_OPIVV_TRANS(vmerge_vvm, opivv_vmerge_check)
+
+static bool opivx_vmerge_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s, RVV) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            ((a->vm == 0) || (a->rs2 == 0)));
+}
+GEN_OPIVX_TRANS(vmerge_vxm, opivx_vmerge_check)
+
+GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vmerge_check)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 5109654f9f..273b705847 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1955,3 +1955,61 @@  GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq)
 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh)
 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl)
 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq)
+
+/* Vector Integer Merge and Move Instructions */
+#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN)                 \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
+        CPURISCVState *env, uint32_t desc)                           \
+{                                                                    \
+    uint32_t mlen = vext_mlen(desc);                                 \
+    uint32_t vm = vext_vm(desc);                                     \
+    uint32_t vl = env->vl;                                           \
+    uint32_t esz = sizeof(ETYPE);                                    \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t i;                                                      \
+                                                                     \
+    for (i = 0; i < vl; i++) {                                       \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                   \
+            ETYPE s2 = *((ETYPE *)vs2 + H(i));                       \
+            *((ETYPE *)vd + H1(i)) = s2;                             \
+        } else {                                                     \
+            ETYPE s1 = *((ETYPE *)vs1 + H(i));                       \
+            *((ETYPE *)vd + H(i)) = s1;                              \
+        }                                                            \
+    }                                                                \
+    if (i != 0) {                                                    \
+        CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                     \
+    }                                                                \
+}
+GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1, clearb)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq)
+
+#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN)                 \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
+        void *vs2, CPURISCVState *env, uint32_t desc)                \
+{                                                                    \
+    uint32_t mlen = vext_mlen(desc);                                 \
+    uint32_t vm = vext_vm(desc);                                     \
+    uint32_t vl = env->vl;                                           \
+    uint32_t esz = sizeof(ETYPE);                                    \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t i;                                                      \
+                                                                     \
+    for (i = 0; i < vl; i++) {                                       \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                   \
+            ETYPE s2 = *((ETYPE *)vs2 + H(i));                       \
+            *((ETYPE *)vd + H1(i)) = s2;                             \
+        } else {                                                     \
+            *((ETYPE *)vd + H(i)) = (ETYPE)(target_long)s1;          \
+        }                                                            \
+    }                                                                \
+    if (i != 0) {                                                    \
+        CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                     \
+    }                                                                \
+}
+GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1, clearb)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq)