Message ID | 20200330153633.15298-28-zhiwei_liu@c-sky.com |
---|---|
State | New |
Headers | show |
Series | target/riscv: support vector extension v0.7.1 | expand |
On Mon, Mar 30, 2020 at 9:31 AM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote: > > Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Alistair > --- > target/riscv/helper.h | 22 +++ > target/riscv/insn32.decode | 7 + > target/riscv/insn_trans/trans_rvv.inc.c | 9 ++ > target/riscv/vector_helper.c | 205 ++++++++++++++++++++++++ > 4 files changed, 243 insertions(+) > > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index e6cae1b59c..eb383787a2 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -745,3 +745,25 @@ DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) > DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) > DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) > DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) > + > +DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) > diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode > index 633f782fbf..2e0e66bdfa 100644 > --- a/target/riscv/insn32.decode > +++ b/target/riscv/insn32.decode > @@ -424,6 +424,13 @@ vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm > vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm > vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm > vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm > +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm > +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm > +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm > +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm > > vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm > vsetvl 1000000 ..... ..... 111 ..... 1010111 @r > diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c > index 08a8444b46..c67faba23c 100644 > --- a/target/riscv/insn_trans/trans_rvv.inc.c > +++ b/target/riscv/insn_trans/trans_rvv.inc.c > @@ -1782,3 +1782,12 @@ GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) > /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ > GEN_OPIVV_TRANS(vsmul_vv, opivv_check) > GEN_OPIVX_TRANS(vsmul_vx, opivx_check) > + > +/* Vector Widening Saturating Scaled Multiply-Add */ > +GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check) > +GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check) > +GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check) > +GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx) > +GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx) > +GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx) > +GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx) > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index 23868fb1b2..b9c1cf5237 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c > @@ -2703,3 +2703,208 @@ GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) > GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) > GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) > GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) > + > +/* Vector Widening Saturating Scaled Multiply-Add */ > +static inline uint16_t > +vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, > + uint16_t c) > +{ > + uint8_t round; > + uint16_t res = (uint16_t)a * b; > + > + round = get_round(vxrm, res, 4); > + res = (res >> 4) + round; > + return saddu16(env, vxrm, c, res); > +} > + > +static inline uint32_t > +vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, > + uint32_t c) > +{ > + uint8_t round; > + uint32_t res = (uint32_t)a * b; > + > + round = get_round(vxrm, res, 8); > + res = (res >> 8) + round; > + return saddu32(env, vxrm, c, res); > +} > + > +static inline uint64_t > +vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, > + uint64_t c) > +{ > + uint8_t round; > + uint64_t res = (uint64_t)a * b; > + > + round = get_round(vxrm, res, 16); > + res = (res >> 16) + round; > + return saddu64(env, vxrm, c, res); > +} > + > +#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ > +static inline void \ > +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ > + CPURISCVState *env, int vxrm) \ > +{ \ > + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ > + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ > + TD d = *((TD *)vd + HD(i)); \ > + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ > +} > + > +RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) > +RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) > +RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) > +GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh) > +GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl) > +GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq) > + > +#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ > +static inline void \ > +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ > + CPURISCVState *env, int vxrm) \ > +{ \ > + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ > + TD d = *((TD *)vd + HD(i)); \ > + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ > +} > + > +RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) > +RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) > +RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) > +GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh) > +GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl) > +GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq) > + > +static inline int16_t > +vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) > +{ > + uint8_t round; > + int16_t res = (int16_t)a * b; > + > + round = get_round(vxrm, res, 4); > + res = (res >> 4) + round; > + return sadd16(env, vxrm, c, res); > +} > + > +static inline int32_t > +vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) > +{ > + uint8_t round; > + int32_t res = (int32_t)a * b; > + > + round = get_round(vxrm, res, 8); > + res = (res >> 8) + round; > + return sadd32(env, vxrm, c, res); > + > +} > + > +static inline int64_t > +vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) > +{ > + uint8_t round; > + int64_t res = (int64_t)a * b; > + > + round = get_round(vxrm, res, 16); > + res = (res >> 16) + round; > + return sadd64(env, vxrm, c, res); > +} > + > +RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) > +RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) > +RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) > +GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh) > +GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl) > +GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq) > +RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) > +RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) > +RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) > +GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh) > +GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl) > +GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq) > + > +static inline int16_t > +vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) > +{ > + uint8_t round; > + int16_t res = a * (int16_t)b; > + > + round = get_round(vxrm, res, 4); > + res = (res >> 4) + round; > + return ssub16(env, vxrm, c, res); > +} > + > +static inline int32_t > +vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) > +{ > + uint8_t round; > + int32_t res = a * (int32_t)b; > + > + round = get_round(vxrm, res, 8); > + res = (res >> 8) + round; > + return ssub32(env, vxrm, c, res); > +} > + > +static inline int64_t > +vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) > +{ > + uint8_t round; > + int64_t res = a * (int64_t)b; > + > + round = get_round(vxrm, res, 16); > + res = (res >> 16) + round; > + return ssub64(env, vxrm, c, res); > +} > + > +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) > +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) > +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) > +GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh) > +GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl) > +GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq) > +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) > +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) > +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) > +GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh) > +GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl) > +GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq) > + > +static inline int16_t > +vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) > +{ > + uint8_t round; > + int16_t res = (int16_t)a * b; > + > + round = get_round(vxrm, res, 4); > + res = (res >> 4) + round; > + return ssub16(env, vxrm, c, res); > +} > + > +static inline int32_t > +vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) > +{ > + uint8_t round; > + int32_t res = (int32_t)a * b; > + > + round = get_round(vxrm, res, 8); > + res = (res >> 8) + round; > + return ssub32(env, vxrm, c, res); > +} > + > +static inline int64_t > +vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) > +{ > + uint8_t round; > + int64_t res = (int64_t)a * b; > + > + round = get_round(vxrm, res, 16); > + res = (res >> 16) + round; > + return ssub64(env, vxrm, c, res); > +} > + > +RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) > +RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) > +RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) > +GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) > +GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) > +GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq) > -- > 2.23.0 >
diff --git a/target/riscv/helper.h b/target/riscv/helper.h index e6cae1b59c..eb383787a2 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -745,3 +745,25 @@ DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 633f782fbf..2e0e66bdfa 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -424,6 +424,13 @@ vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index 08a8444b46..c67faba23c 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -1782,3 +1782,12 @@ GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ GEN_OPIVV_TRANS(vsmul_vv, opivv_check) GEN_OPIVX_TRANS(vsmul_vx, opivx_check) + +/* Vector Widening Saturating Scaled Multiply-Add */ +GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 23868fb1b2..b9c1cf5237 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2703,3 +2703,208 @@ GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) + +/* Vector Widening Saturating Scaled Multiply-Add */ +static inline uint16_t +vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, + uint16_t c) +{ + uint8_t round; + uint16_t res = (uint16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return saddu16(env, vxrm, c, res); +} + +static inline uint32_t +vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, + uint32_t c) +{ + uint8_t round; + uint32_t res = (uint32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return saddu32(env, vxrm, c, res); +} + +static inline uint64_t +vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, + uint64_t c) +{ + uint8_t round; + uint64_t res = (uint64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return saddu64(env, vxrm, c, res); +} + +#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ +} + +RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) +RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) +RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) +GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq) + +#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ +} + +RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) +RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) +RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) +GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) +{ + uint8_t round; + int16_t res = (int16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return sadd16(env, vxrm, c, res); +} + +static inline int32_t +vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) +{ + uint8_t round; + int32_t res = (int32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return sadd32(env, vxrm, c, res); + +} + +static inline int64_t +vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) +{ + uint8_t round; + int64_t res = (int64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return sadd64(env, vxrm, c, res); +} + +RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) +RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) +RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) +GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq) +RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) +RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) +RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) +GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) +{ + uint8_t round; + int16_t res = a * (int16_t)b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return ssub16(env, vxrm, c, res); +} + +static inline int32_t +vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) +{ + uint8_t round; + int32_t res = a * (int32_t)b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return ssub32(env, vxrm, c, res); +} + +static inline int64_t +vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) +{ + uint8_t round; + int64_t res = a * (int64_t)b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return ssub64(env, vxrm, c, res); +} + +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) +GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) +GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) +{ + uint8_t round; + int16_t res = (int16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return ssub16(env, vxrm, c, res); +} + +static inline int32_t +vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) +{ + uint8_t round; + int32_t res = (int32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return ssub32(env, vxrm, c, res); +} + +static inline int64_t +vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) +{ + uint8_t round; + int64_t res = (int64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return ssub64(env, vxrm, c, res); +} + +RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) +RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) +RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) +GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq)