@@ -1267,3 +1267,8 @@ INSN_LSX(vclz_b, vv)
INSN_LSX(vclz_h, vv)
INSN_LSX(vclz_w, vv)
INSN_LSX(vclz_d, vv)
+
+INSN_LSX(vpcnt_b, vv)
+INSN_LSX(vpcnt_h, vv)
+INSN_LSX(vpcnt_w, vv)
+INSN_LSX(vpcnt_d, vv)
@@ -495,3 +495,8 @@ DEF_HELPER_3(vclz_b, void, env, i32, i32)
DEF_HELPER_3(vclz_h, void, env, i32, i32)
DEF_HELPER_3(vclz_w, void, env, i32, i32)
DEF_HELPER_3(vclz_d, void, env, i32, i32)
+
+DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
@@ -2794,3 +2794,8 @@ TRANS(vclz_b, gen_vv, gen_helper_vclz_b)
TRANS(vclz_h, gen_vv, gen_helper_vclz_h)
TRANS(vclz_w, gen_vv, gen_helper_vclz_w)
TRANS(vclz_d, gen_vv, gen_helper_vclz_d)
+
+TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b)
+TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h)
+TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w)
+TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d)
@@ -968,3 +968,8 @@ vclz_b 0111 00101001 11000 00100 ..... ..... @vv
vclz_h 0111 00101001 11000 00101 ..... ..... @vv
vclz_w 0111 00101001 11000 00110 ..... ..... @vv
vclz_d 0111 00101001 11000 00111 ..... ..... @vv
+
+vpcnt_b 0111 00101001 11000 01000 ..... ..... @vv
+vpcnt_h 0111 00101001 11000 01001 ..... ..... @vv
+vpcnt_w 0111 00101001 11000 01010 ..... ..... @vv
+vpcnt_d 0111 00101001 11000 01011 ..... ..... @vv
@@ -2201,3 +2201,33 @@ DO_2OP(vclz_b, 8, B, uint8_t, DO_CLZ_B)
DO_2OP(vclz_h, 16, H, uint16_t, DO_CLZ_H)
DO_2OP(vclz_w, 32, W, uint32_t, DO_CLZ_W)
DO_2OP(vclz_d, 64, D, uint64_t, DO_CLZ_D)
+
+static uint64_t do_vpcnt(uint64_t u1)
+{
+ u1 = (u1 & 0x5555555555555555ULL) + ((u1 >> 1) & 0x5555555555555555ULL);
+ u1 = (u1 & 0x3333333333333333ULL) + ((u1 >> 2) & 0x3333333333333333ULL);
+ u1 = (u1 & 0x0F0F0F0F0F0F0F0FULL) + ((u1 >> 4) & 0x0F0F0F0F0F0F0F0FULL);
+ u1 = (u1 & 0x00FF00FF00FF00FFULL) + ((u1 >> 8) & 0x00FF00FF00FF00FFULL);
+ u1 = (u1 & 0x0000FFFF0000FFFFULL) + ((u1 >> 16) & 0x0000FFFF0000FFFFULL);
+ u1 = (u1 & 0x00000000FFFFFFFFULL) + ((u1 >> 32));
+
+ return u1;
+}
+
+#define VPCNT(NAME, BIT, E, T) \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
+{ \
+ int i; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) \
+ { \
+ Vd->E(i) = do_vpcnt((T)Vj->E(i)); \
+ } \
+}
+
+VPCNT(vpcnt_b, 8, B, uint8_t)
+VPCNT(vpcnt_h, 16, H, uint16_t)
+VPCNT(vpcnt_w, 32, W, uint32_t)
+VPCNT(vpcnt_d, 64, D, uint64_t)
This patch includes: - VPCNT.{B/H/W/D}. Signed-off-by: Song Gao <gaosong@loongson.cn> --- target/loongarch/disas.c | 5 ++++ target/loongarch/helper.h | 5 ++++ target/loongarch/insn_trans/trans_lsx.c.inc | 5 ++++ target/loongarch/insns.decode | 5 ++++ target/loongarch/lsx_helper.c | 30 +++++++++++++++++++++ 5 files changed, 50 insertions(+)