diff mbox series

[17/38] target/riscv: Signed MSW 32x16 Multiply and Add Instructions

Message ID 20210212150256.885-18-zhiwei_liu@c-sky.com
State New
Headers show
Series target/riscv: support packed extension v0.9.2 | expand

Commit Message

LIU Zhiwei Feb. 12, 2021, 3:02 p.m. UTC
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/helper.h                   |  17 ++
 target/riscv/insn32.decode              |  17 ++
 target/riscv/insn_trans/trans_rvp.c.inc |  18 ++
 target/riscv/packed_helper.c            | 208 ++++++++++++++++++++++++
 4 files changed, 260 insertions(+)

Comments

Alistair Francis March 16, 2021, 4:01 p.m. UTC | #1
On Fri, Feb 12, 2021 at 10:38 AM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote:
>
> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  target/riscv/helper.h                   |  17 ++
>  target/riscv/insn32.decode              |  17 ++
>  target/riscv/insn_trans/trans_rvp.c.inc |  18 ++
>  target/riscv/packed_helper.c            | 208 ++++++++++++++++++++++++
>  4 files changed, 260 insertions(+)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index 0bd21c8514..25aa07a7ff 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -1277,3 +1277,20 @@ DEF_HELPER_4(kmmsb, tl, env, tl, tl, tl)
>  DEF_HELPER_4(kmmsb_u, tl, env, tl, tl, tl)
>  DEF_HELPER_3(kwmmul, tl, env, tl, tl)
>  DEF_HELPER_3(kwmmul_u, tl, env, tl, tl)
> +
> +DEF_HELPER_3(smmwb, tl, env, tl, tl)
> +DEF_HELPER_3(smmwb_u, tl, env, tl, tl)
> +DEF_HELPER_3(smmwt, tl, env, tl, tl)
> +DEF_HELPER_3(smmwt_u, tl, env, tl, tl)
> +DEF_HELPER_4(kmmawb, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawb_u, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt_u, tl, env, tl, tl, tl)
> +DEF_HELPER_3(kmmwb2, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwb2_u, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwt2, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwt2_u, tl, env, tl, tl)
> +DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index e0be2790dc..6e63bab2d9 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -745,3 +745,20 @@ kmmsb      0100001  ..... ..... 001 ..... 1111111 @r
>  kmmsb_u    0101001  ..... ..... 001 ..... 1111111 @r
>  kwmmul     0110001  ..... ..... 001 ..... 1111111 @r
>  kwmmul_u   0111001  ..... ..... 001 ..... 1111111 @r
> +
> +smmwb      0100010  ..... ..... 001 ..... 1111111 @r
> +smmwb_u    0101010  ..... ..... 001 ..... 1111111 @r
> +smmwt      0110010  ..... ..... 001 ..... 1111111 @r
> +smmwt_u    0111010  ..... ..... 001 ..... 1111111 @r
> +kmmawb     0100011  ..... ..... 001 ..... 1111111 @r
> +kmmawb_u   0101011  ..... ..... 001 ..... 1111111 @r
> +kmmawt     0110011  ..... ..... 001 ..... 1111111 @r
> +kmmawt_u   0111011  ..... ..... 001 ..... 1111111 @r
> +kmmwb2     1000111  ..... ..... 001 ..... 1111111 @r
> +kmmwb2_u   1001111  ..... ..... 001 ..... 1111111 @r
> +kmmwt2     1010111  ..... ..... 001 ..... 1111111 @r
> +kmmwt2_u   1011111  ..... ..... 001 ..... 1111111 @r
> +kmmawb2    1100111  ..... ..... 001 ..... 1111111 @r
> +kmmawb2_u  1101111  ..... ..... 001 ..... 1111111 @r
> +kmmawt2    1110111  ..... ..... 001 ..... 1111111 @r
> +kmmawt2_u  1111111  ..... ..... 001 ..... 1111111 @r
> diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
> index fbc9c0b57b..e708ae7a6a 100644
> --- a/target/riscv/insn_trans/trans_rvp.c.inc
> +++ b/target/riscv/insn_trans/trans_rvp.c.inc
> @@ -564,3 +564,21 @@ GEN_RVP_R_ACC_OOL(kmmsb);
>  GEN_RVP_R_ACC_OOL(kmmsb_u);
>  GEN_RVP_R_OOL(kwmmul);
>  GEN_RVP_R_OOL(kwmmul_u);
> +
> +/* Most Significant Word "32x16" Multiply & Add Instructions */
> +GEN_RVP_R_OOL(smmwb);
> +GEN_RVP_R_OOL(smmwb_u);
> +GEN_RVP_R_OOL(smmwt);
> +GEN_RVP_R_OOL(smmwt_u);
> +GEN_RVP_R_ACC_OOL(kmmawb);
> +GEN_RVP_R_ACC_OOL(kmmawb_u);
> +GEN_RVP_R_ACC_OOL(kmmawt);
> +GEN_RVP_R_ACC_OOL(kmmawt_u);
> +GEN_RVP_R_OOL(kmmwb2);
> +GEN_RVP_R_OOL(kmmwb2_u);
> +GEN_RVP_R_OOL(kmmwt2);
> +GEN_RVP_R_OOL(kmmwt2_u);
> +GEN_RVP_R_ACC_OOL(kmmawb2);
> +GEN_RVP_R_ACC_OOL(kmmawb2_u);
> +GEN_RVP_R_ACC_OOL(kmmawt2);
> +GEN_RVP_R_ACC_OOL(kmmawt2_u);
> diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
> index c1322d2fac..ea3c9f6dd8 100644
> --- a/target/riscv/packed_helper.c
> +++ b/target/riscv/packed_helper.c
> @@ -1477,3 +1477,211 @@ static inline void do_kwmmul_u(CPURISCVState *env, void *vd, void *va,
>  }
>
>  RVPR(kwmmul_u, 1, 4);
> +
> +/* Most Significant Word "32x16" Multiply & Add Instructions */
> +static inline void do_smmwb(CPURISCVState *env, void *vd, void *va,
> +                            void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16;
> +}
> +
> +RVPR(smmwb, 1, 4);
> +
> +static inline void do_smmwb_u(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 15)) >> 16;
> +}
> +
> +RVPR(smmwb_u, 1, 4);
> +
> +static inline void do_smmwt(CPURISCVState *env, void *vd, void *va,
> +                            void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16;
> +}
> +
> +RVPR(smmwt, 1, 4);
> +
> +static inline void do_smmwt_u(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 15)) >> 16;
> +}
> +
> +RVPR(smmwt_u, 1, 4);
> +
> +static inline void do_kmmawb(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb, 1, 4);
> +
> +static inline void do_kmmawb_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i)] +
> +                               (1ull << 15)) >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb_u, 1, 4);
> +
> +static inline void do_kmmawt(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16,
> +                      c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt, 1, 4);
> +
> +static inline void do_kmmawt_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] +
> +                               (1ull << 15)) >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt_u, 1, 4);
> +
> +static inline void do_kmmwb2(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
> +    }
> +}
> +
> +RVPR(kmmwb2, 1, 4);
> +
> +static inline void do_kmmwb2_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
> +    }
> +}
> +
> +RVPR(kmmwb2_u, 1, 4);
> +
> +static inline void do_kmmwt2(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
> +    }
> +}
> +
> +RVPR(kmmwt2, 1, 4);
> +
> +static inline void do_kmmwt2_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
> +    }
> +}
> +
> +RVPR(kmmwt2_u, 1, 4);
> +
> +static inline void do_kmmawb2(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb2, 1, 4);
> +
> +static inline void do_kmmawb2_u(CPURISCVState *env, void *vd, void *va,
> +                                void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb2_u, 1, 4);
> +
> +static inline void do_kmmawt2(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt2, 1, 4);
> +
> +static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va,
> +                                void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt2_u, 1, 4);
> --
> 2.17.1
>
diff mbox series

Patch

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 0bd21c8514..25aa07a7ff 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1277,3 +1277,20 @@  DEF_HELPER_4(kmmsb, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmsb_u, tl, env, tl, tl, tl)
 DEF_HELPER_3(kwmmul, tl, env, tl, tl)
 DEF_HELPER_3(kwmmul_u, tl, env, tl, tl)
+
+DEF_HELPER_3(smmwb, tl, env, tl, tl)
+DEF_HELPER_3(smmwb_u, tl, env, tl, tl)
+DEF_HELPER_3(smmwt, tl, env, tl, tl)
+DEF_HELPER_3(smmwt_u, tl, env, tl, tl)
+DEF_HELPER_4(kmmawb, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawb_u, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt_u, tl, env, tl, tl, tl)
+DEF_HELPER_3(kmmwb2, tl, env, tl, tl)
+DEF_HELPER_3(kmmwb2_u, tl, env, tl, tl)
+DEF_HELPER_3(kmmwt2, tl, env, tl, tl)
+DEF_HELPER_3(kmmwt2_u, tl, env, tl, tl)
+DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index e0be2790dc..6e63bab2d9 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -745,3 +745,20 @@  kmmsb      0100001  ..... ..... 001 ..... 1111111 @r
 kmmsb_u    0101001  ..... ..... 001 ..... 1111111 @r
 kwmmul     0110001  ..... ..... 001 ..... 1111111 @r
 kwmmul_u   0111001  ..... ..... 001 ..... 1111111 @r
+
+smmwb      0100010  ..... ..... 001 ..... 1111111 @r
+smmwb_u    0101010  ..... ..... 001 ..... 1111111 @r
+smmwt      0110010  ..... ..... 001 ..... 1111111 @r
+smmwt_u    0111010  ..... ..... 001 ..... 1111111 @r
+kmmawb     0100011  ..... ..... 001 ..... 1111111 @r
+kmmawb_u   0101011  ..... ..... 001 ..... 1111111 @r
+kmmawt     0110011  ..... ..... 001 ..... 1111111 @r
+kmmawt_u   0111011  ..... ..... 001 ..... 1111111 @r
+kmmwb2     1000111  ..... ..... 001 ..... 1111111 @r
+kmmwb2_u   1001111  ..... ..... 001 ..... 1111111 @r
+kmmwt2     1010111  ..... ..... 001 ..... 1111111 @r
+kmmwt2_u   1011111  ..... ..... 001 ..... 1111111 @r
+kmmawb2    1100111  ..... ..... 001 ..... 1111111 @r
+kmmawb2_u  1101111  ..... ..... 001 ..... 1111111 @r
+kmmawt2    1110111  ..... ..... 001 ..... 1111111 @r
+kmmawt2_u  1111111  ..... ..... 001 ..... 1111111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
index fbc9c0b57b..e708ae7a6a 100644
--- a/target/riscv/insn_trans/trans_rvp.c.inc
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -564,3 +564,21 @@  GEN_RVP_R_ACC_OOL(kmmsb);
 GEN_RVP_R_ACC_OOL(kmmsb_u);
 GEN_RVP_R_OOL(kwmmul);
 GEN_RVP_R_OOL(kwmmul_u);
+
+/* Most Significant Word "32x16" Multiply & Add Instructions */
+GEN_RVP_R_OOL(smmwb);
+GEN_RVP_R_OOL(smmwb_u);
+GEN_RVP_R_OOL(smmwt);
+GEN_RVP_R_OOL(smmwt_u);
+GEN_RVP_R_ACC_OOL(kmmawb);
+GEN_RVP_R_ACC_OOL(kmmawb_u);
+GEN_RVP_R_ACC_OOL(kmmawt);
+GEN_RVP_R_ACC_OOL(kmmawt_u);
+GEN_RVP_R_OOL(kmmwb2);
+GEN_RVP_R_OOL(kmmwb2_u);
+GEN_RVP_R_OOL(kmmwt2);
+GEN_RVP_R_OOL(kmmwt2_u);
+GEN_RVP_R_ACC_OOL(kmmawb2);
+GEN_RVP_R_ACC_OOL(kmmawb2_u);
+GEN_RVP_R_ACC_OOL(kmmawt2);
+GEN_RVP_R_ACC_OOL(kmmawt2_u);
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
index c1322d2fac..ea3c9f6dd8 100644
--- a/target/riscv/packed_helper.c
+++ b/target/riscv/packed_helper.c
@@ -1477,3 +1477,211 @@  static inline void do_kwmmul_u(CPURISCVState *env, void *vd, void *va,
 }
 
 RVPR(kwmmul_u, 1, 4);
+
+/* Most Significant Word "32x16" Multiply & Add Instructions */
+static inline void do_smmwb(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16;
+}
+
+RVPR(smmwb, 1, 4);
+
+static inline void do_smmwb_u(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 15)) >> 16;
+}
+
+RVPR(smmwb_u, 1, 4);
+
+static inline void do_smmwt(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16;
+}
+
+RVPR(smmwt, 1, 4);
+
+static inline void do_smmwt_u(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 15)) >> 16;
+}
+
+RVPR(smmwt_u, 1, 4);
+
+static inline void do_kmmawb(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb, 1, 4);
+
+static inline void do_kmmawb_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i)] +
+                               (1ull << 15)) >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb_u, 1, 4);
+
+static inline void do_kmmawt(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16,
+                      c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt, 1, 4);
+
+static inline void do_kmmawt_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] +
+                               (1ull << 15)) >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt_u, 1, 4);
+
+static inline void do_kmmwb2(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
+    }
+}
+
+RVPR(kmmwb2, 1, 4);
+
+static inline void do_kmmwb2_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
+    }
+}
+
+RVPR(kmmwb2_u, 1, 4);
+
+static inline void do_kmmwt2(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
+    }
+}
+
+RVPR(kmmwt2, 1, 4);
+
+static inline void do_kmmwt2_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
+    }
+}
+
+RVPR(kmmwt2_u, 1, 4);
+
+static inline void do_kmmawb2(CPURISCVState *env, void *vd, void *va,
+                              void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb2, 1, 4);
+
+static inline void do_kmmawb2_u(CPURISCVState *env, void *vd, void *va,
+                                void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb2_u, 1, 4);
+
+static inline void do_kmmawt2(CPURISCVState *env, void *vd, void *va,
+                              void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt2, 1, 4);
+
+static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va,
+                                void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt2_u, 1, 4);