diff mbox series

[RFC,v2,31/44] target/loongarch: Implement vpcnt

Message ID 20230328030631.3117129-32-gaosong@loongson.cn
State New
Headers show
Series Add LoongArch LSX instructions | expand

Commit Message

gaosong March 28, 2023, 3:06 a.m. UTC
This patch includes:
- VPCNT.{B/H/W/D}.

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/disas.c                    |  5 ++++
 target/loongarch/helper.h                   |  5 ++++
 target/loongarch/insn_trans/trans_lsx.c.inc |  5 ++++
 target/loongarch/insns.decode               |  5 ++++
 target/loongarch/lsx_helper.c               | 30 +++++++++++++++++++++
 5 files changed, 50 insertions(+)

Comments

Richard Henderson April 2, 2023, 3:35 a.m. UTC | #1
On 3/27/23 20:06, Song Gao wrote:
> +static uint64_t do_vpcnt(uint64_t u1)
> +{
> +    u1 = (u1 & 0x5555555555555555ULL) + ((u1 >>  1) & 0x5555555555555555ULL);
> +    u1 = (u1 & 0x3333333333333333ULL) + ((u1 >>  2) & 0x3333333333333333ULL);
> +    u1 = (u1 & 0x0F0F0F0F0F0F0F0FULL) + ((u1 >>  4) & 0x0F0F0F0F0F0F0F0FULL);
> +    u1 = (u1 & 0x00FF00FF00FF00FFULL) + ((u1 >>  8) & 0x00FF00FF00FF00FFULL);
> +    u1 = (u1 & 0x0000FFFF0000FFFFULL) + ((u1 >> 16) & 0x0000FFFF0000FFFFULL);
> +    u1 = (u1 & 0x00000000FFFFFFFFULL) + ((u1 >> 32));
> +
> +    return u1;
> +}
> +
> +#define VPCNT(NAME, BIT, E, T)                                      \
> +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
> +{                                                                   \
> +    int i;                                                          \
> +    VReg *Vd = &(env->fpr[vd].vreg);                                \
> +    VReg *Vj = &(env->fpr[vj].vreg);                                \
> +                                                                    \
> +    for (i = 0; i < LSX_LEN/BIT; i++)                               \
> +    {                                                               \
> +        Vd->E(i) = do_vpcnt((T)Vj->E(i));                           \
> +    }                                                               \
> +}
> +
> +VPCNT(vpcnt_b, 8, B, uint8_t)
> +VPCNT(vpcnt_h, 16, H, uint16_t)
> +VPCNT(vpcnt_w, 32, W, uint32_t)
> +VPCNT(vpcnt_d, 64, D, uint64_t)

host-utils.h has ctpop{8,16,32,64}.


r~
diff mbox series

Patch

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 0c82a1d9d1..0ca51de9d8 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1267,3 +1267,8 @@  INSN_LSX(vclz_b,           vv)
 INSN_LSX(vclz_h,           vv)
 INSN_LSX(vclz_w,           vv)
 INSN_LSX(vclz_d,           vv)
+
+INSN_LSX(vpcnt_b,          vv)
+INSN_LSX(vpcnt_h,          vv)
+INSN_LSX(vpcnt_w,          vv)
+INSN_LSX(vpcnt_d,          vv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index a7facc6bc1..38e310512b 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -495,3 +495,8 @@  DEF_HELPER_3(vclz_b, void, env, i32, i32)
 DEF_HELPER_3(vclz_h, void, env, i32, i32)
 DEF_HELPER_3(vclz_w, void, env, i32, i32)
 DEF_HELPER_3(vclz_d, void, env, i32, i32)
+
+DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
index 5d81c02103..59923eb1fa 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2794,3 +2794,8 @@  TRANS(vclz_b, gen_vv, gen_helper_vclz_b)
 TRANS(vclz_h, gen_vv, gen_helper_vclz_h)
 TRANS(vclz_w, gen_vv, gen_helper_vclz_w)
 TRANS(vclz_d, gen_vv, gen_helper_vclz_d)
+
+TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b)
+TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h)
+TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w)
+TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7591ec1bab..f865e83da5 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -968,3 +968,8 @@  vclz_b           0111 00101001 11000 00100 ..... .....    @vv
 vclz_h           0111 00101001 11000 00101 ..... .....    @vv
 vclz_w           0111 00101001 11000 00110 ..... .....    @vv
 vclz_d           0111 00101001 11000 00111 ..... .....    @vv
+
+vpcnt_b          0111 00101001 11000 01000 ..... .....    @vv
+vpcnt_h          0111 00101001 11000 01001 ..... .....    @vv
+vpcnt_w          0111 00101001 11000 01010 ..... .....    @vv
+vpcnt_d          0111 00101001 11000 01011 ..... .....    @vv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index 8ec479dc2d..94dded7e49 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -2201,3 +2201,33 @@  DO_2OP(vclz_b, 8, B, uint8_t, DO_CLZ_B)
 DO_2OP(vclz_h, 16, H, uint16_t, DO_CLZ_H)
 DO_2OP(vclz_w, 32, W, uint32_t, DO_CLZ_W)
 DO_2OP(vclz_d, 64, D, uint64_t, DO_CLZ_D)
+
+static uint64_t do_vpcnt(uint64_t u1)
+{
+    u1 = (u1 & 0x5555555555555555ULL) + ((u1 >>  1) & 0x5555555555555555ULL);
+    u1 = (u1 & 0x3333333333333333ULL) + ((u1 >>  2) & 0x3333333333333333ULL);
+    u1 = (u1 & 0x0F0F0F0F0F0F0F0FULL) + ((u1 >>  4) & 0x0F0F0F0F0F0F0F0FULL);
+    u1 = (u1 & 0x00FF00FF00FF00FFULL) + ((u1 >>  8) & 0x00FF00FF00FF00FFULL);
+    u1 = (u1 & 0x0000FFFF0000FFFFULL) + ((u1 >> 16) & 0x0000FFFF0000FFFFULL);
+    u1 = (u1 & 0x00000000FFFFFFFFULL) + ((u1 >> 32));
+
+    return u1;
+}
+
+#define VPCNT(NAME, BIT, E, T)                                      \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
+{                                                                   \
+    int i;                                                          \
+    VReg *Vd = &(env->fpr[vd].vreg);                                \
+    VReg *Vj = &(env->fpr[vj].vreg);                                \
+                                                                    \
+    for (i = 0; i < LSX_LEN/BIT; i++)                               \
+    {                                                               \
+        Vd->E(i) = do_vpcnt((T)Vj->E(i));                           \
+    }                                                               \
+}
+
+VPCNT(vpcnt_b, 8, B, uint8_t)
+VPCNT(vpcnt_h, 16, H, uint16_t)
+VPCNT(vpcnt_w, 32, W, uint32_t)
+VPCNT(vpcnt_d, 64, D, uint64_t)