Message ID | 20200312145900.2054-23-zhiwei_liu@c-sky.com |
---|---|
State | New |
Headers | show |
Series | target/riscv: support vector extension v0.7.1 | expand |
On 3/12/20 7:58 AM, LIU Zhiwei wrote: > +/* Vector Integer Merge and Move Instructions */ > +static bool opivv_vmerge_check(DisasContext *s, arg_rmrr *a) > +{ > + return (vext_check_isa_ill(s, RVV) && > + vext_check_overlap_mask(s, a->rd, a->vm, false) && > + vext_check_reg(s, a->rd, false) && > + vext_check_reg(s, a->rs2, false) && > + vext_check_reg(s, a->rs1, false) && > + ((a->vm == 0) || (a->rs2 == 0))); > +} > +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vmerge_check) > + > +static bool opivx_vmerge_check(DisasContext *s, arg_rmrr *a) > +{ > + return (vext_check_isa_ill(s, RVV) && > + vext_check_overlap_mask(s, a->rd, a->vm, false) && > + vext_check_reg(s, a->rd, false) && > + vext_check_reg(s, a->rs2, false) && > + ((a->vm == 0) || (a->rs2 == 0))); > +} > +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vmerge_check) > + > +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vmerge_check) I think you need to special case these. The unmasked instructions are the canonical move instructions: vmv.v.*. You definitely want to use tcg_gen_gvec_mov (vv), tcg_gen_gvec_dup_i{32,64} (vx) and tcg_gen_gvec_dup{8,16,32,64}i (vi). > + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ > + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ > + *((ETYPE *)vd + H1(i)) = s2; \ > + } else { \ > + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ > + *((ETYPE *)vd + H(i)) = s1; \ > + } \ Perhaps better as ETYPE *vt = (!vm && !vext_elem_mask(v0, mlen, i) ? vs2 : vs1); *((ETYPE *)vd + H(i)) = *((ETYPE *)vt + H(i)); > + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ > + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ > + *((ETYPE *)vd + H1(i)) = s2; \ > + } else { \ > + *((ETYPE *)vd + H(i)) = (ETYPE)(target_long)s1; \ > + } \ Perhaps better as ETYPE s2 = *((ETYPE *)vs2 + H(i)); ETYPE d = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : (ETYPE)(target_long)s1); *((ETYPE *)vd + H(i)) = d; as most host platforms have a conditional reg-reg move, but not a conditional load. r~
On 2020/3/14 15:27, Richard Henderson wrote: > On 3/12/20 7:58 AM, LIU Zhiwei wrote: >> +/* Vector Integer Merge and Move Instructions */ >> +static bool opivv_vmerge_check(DisasContext *s, arg_rmrr *a) >> +{ >> + return (vext_check_isa_ill(s, RVV) && >> + vext_check_overlap_mask(s, a->rd, a->vm, false) && >> + vext_check_reg(s, a->rd, false) && >> + vext_check_reg(s, a->rs2, false) && >> + vext_check_reg(s, a->rs1, false) && >> + ((a->vm == 0) || (a->rs2 == 0))); >> +} >> +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vmerge_check) >> + >> +static bool opivx_vmerge_check(DisasContext *s, arg_rmrr *a) >> +{ >> + return (vext_check_isa_ill(s, RVV) && >> + vext_check_overlap_mask(s, a->rd, a->vm, false) && >> + vext_check_reg(s, a->rd, false) && >> + vext_check_reg(s, a->rs2, false) && >> + ((a->vm == 0) || (a->rs2 == 0))); >> +} >> +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vmerge_check) >> + >> +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vmerge_check) > I think you need to special case these. The unmasked instructions are the > canonical move instructions: vmv.v.*. > > You definitely want to use tcg_gen_gvec_mov (vv), tcg_gen_gvec_dup_i{32,64} > (vx) and tcg_gen_gvec_dup{8,16,32,64}i (vi). I have a question here. Are these GVEC IRsĀ proper for any vl, or just when vl equals vlmax? I see there are some align assert in these GVEC IR. Now the code is like static bool trans_vmv_v_v(DisasContext *s, arg_r *a) { if (vext_check_isa_ill(s, RVV) && vext_check_reg(s, a->rd, false) && vext_check_reg(s, a->rs1, false)) { if (s->vl_eq_vlmax) { tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), MAXSZ(s), MAXSZ(s)); } else { uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); static gen_helper_gvec_2_ptr * const fns[4] = { gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, }; tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), cpu_env, 0, s->vlen / 8, data, fns[s->sew]); } return true; } return false; } Is it right? Zhiwei > >> + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ >> + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ >> + *((ETYPE *)vd + H1(i)) = s2; \ >> + } else { \ >> + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ >> + *((ETYPE *)vd + H(i)) = s1; \ >> + } \ > Perhaps better as > > ETYPE *vt = (!vm && !vext_elem_mask(v0, mlen, i) ? vs2 : vs1); > *((ETYPE *)vd + H(i)) = *((ETYPE *)vt + H(i)); > >> + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ >> + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ >> + *((ETYPE *)vd + H1(i)) = s2; \ >> + } else { \ >> + *((ETYPE *)vd + H(i)) = (ETYPE)(target_long)s1; \ >> + } \ > Perhaps better as > > ETYPE s2 = *((ETYPE *)vs2 + H(i)); > ETYPE d = (!vm && !vext_elem_mask(v0, mlen, i) > ? s2 : (ETYPE)(target_long)s1); > *((ETYPE *)vd + H(i)) = d; > > as most host platforms have a conditional reg-reg move, but not a conditional load. > > > r~
On 3/15/20 7:57 PM, LIU Zhiwei wrote: >> You definitely want to use tcg_gen_gvec_mov (vv), tcg_gen_gvec_dup_i{32,64} >> (vx) and tcg_gen_gvec_dup{8,16,32,64}i (vi). > I have a question here. > > Are these GVEC IRsĀ proper for any vl, or just when vl equals vlmax? > I see there are some align assert in these GVEC IR. Only vl_eq_vlmax. I should have been more precise. But I expect this boolean to be true quite often. > > Now the code is like > > static bool trans_vmv_v_v(DisasContext *s, arg_r *a) > { > if (vext_check_isa_ill(s, RVV) && > vext_check_reg(s, a->rd, false) && > vext_check_reg(s, a->rs1, false)) { > > if (s->vl_eq_vlmax) { > tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), > vreg_ofs(s, a->rs1), > MAXSZ(s), MAXSZ(s)); > } else { > uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); > static gen_helper_gvec_2_ptr * const fns[4] = { > gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, > gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, > }; > > tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), > cpu_env, 0, s->vlen / 8, data, fns[s->sew]); > } > return true; > } > return false; > } > > Is it right? Yes, that looks fine. r~
diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 1f0d3d60e3..121e9e57e7 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -665,3 +665,12 @@ DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmerge_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 2a5b945139..bcb8273bcc 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -399,6 +399,9 @@ vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index 958737d097..aff5ca8663 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -1481,3 +1481,27 @@ GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) + +/* Vector Integer Merge and Move Instructions */ +static bool opivv_vmerge_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s, RVV) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + ((a->vm == 0) || (a->rs2 == 0))); +} +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vmerge_check) + +static bool opivx_vmerge_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s, RVV) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + ((a->vm == 0) || (a->rs2 == 0))); +} +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vmerge_check) + +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vmerge_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 5109654f9f..273b705847 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1955,3 +1955,61 @@ GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq) GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh) GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl) GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq) + +/* Vector Integer Merge and Move Instructions */ +#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + *((ETYPE *)vd + H1(i)) = s2; \ + } else { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + *((ETYPE *)vd + H(i)) = s1; \ + } \ + } \ + if (i != 0) { \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ + } \ +} +GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1, clearb) +GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh) +GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl) +GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + *((ETYPE *)vd + H1(i)) = s2; \ + } else { \ + *((ETYPE *)vd + H(i)) = (ETYPE)(target_long)s1; \ + } \ + } \ + if (i != 0) { \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ + } \ +} +GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb) +GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh) +GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl) +GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq)
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> --- target/riscv/helper.h | 9 ++++ target/riscv/insn32.decode | 3 ++ target/riscv/insn_trans/trans_rvv.inc.c | 24 ++++++++++ target/riscv/vector_helper.c | 58 +++++++++++++++++++++++++ 4 files changed, 94 insertions(+)