Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.2/patches/2224365/?format=api
{ "id": 2224365, "url": "http://patchwork.ozlabs.org/api/1.2/patches/2224365/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260417104652.17857-14-xiaoou@iscas.ac.cn/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/1.2/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260417104652.17857-14-xiaoou@iscas.ac.cn>", "list_archive_url": null, "date": "2026-04-17T10:46:50", "name": "[13/14] target/riscv: rvp: add rv32-only register-pair instructions", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "9d88b6df697524296578eedb86eee84a916e2924", "submitter": { "id": 89843, "url": "http://patchwork.ozlabs.org/api/1.2/people/89843/?format=api", "name": "Molly Chen", "email": "xiaoou@iscas.ac.cn" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260417104652.17857-14-xiaoou@iscas.ac.cn/mbox/", "series": [ { "id": 500307, "url": "http://patchwork.ozlabs.org/api/1.2/series/500307/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=500307", "date": "2026-04-17T10:46:37", "name": "target/riscv: add support for RISC-V P extension (v0.20 draft)", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/500307/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2224365/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2224365/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists1p.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=patchwork.ozlabs.org)", "Received": [ "from lists1p.gnu.org (lists1p.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fxs7b16P7z1yCv\n\tfor <incoming@patchwork.ozlabs.org>; Fri, 17 Apr 2026 20:48:59 +1000 (AEST)", "from localhost ([::1] helo=lists1p.gnu.org)\n\tby lists1p.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces@nongnu.org>)\n\tid 1wDgjs-0001Xw-9Y; Fri, 17 Apr 2026 06:47:56 -0400", "from eggs.gnu.org ([2001:470:142:3::10])\n by lists1p.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <xiaoou@iscas.ac.cn>)\n id 1wDgjp-0001Uw-9F; Fri, 17 Apr 2026 06:47:53 -0400", "from smtp21.cstnet.cn ([159.226.251.21] helo=cstnet.cn)\n by eggs.gnu.org with esmtps (TLS1.2:DHE_RSA_AES_256_CBC_SHA1:256)\n (Exim 4.90_1) (envelope-from <xiaoou@iscas.ac.cn>)\n id 1wDgjj-00083b-QA; Fri, 17 Apr 2026 06:47:52 -0400", "from Huawei.localdomain (unknown [36.110.52.2])\n by APP-01 (Coremail) with SMTP id qwCowAB3H2ulD+JpLDmSDQ--.804S15;\n Fri, 17 Apr 2026 18:47:23 +0800 (CST)" ], "From": "Molly Chen <xiaoou@iscas.ac.cn>", "To": "palmer@dabbelt.com, alistair.francis@wdc.com, liwei1518@gmail.com,\n daniel.barboza@oss.qualcomm.com, zhiwei_liu@linux.alibaba.com,\n chao.liu.zevorn@gmail.com", "Cc": "xiaoou@iscas.ac.cn,\n\tqemu-riscv@nongnu.org,\n\tqemu-devel@nongnu.org", "Subject": "[PATCH 13/14] target/riscv: rvp: add rv32-only register-pair\n instructions", "Date": "Fri, 17 Apr 2026 18:46:50 +0800", "Message-Id": "<20260417104652.17857-14-xiaoou@iscas.ac.cn>", "X-Mailer": "git-send-email 2.34.1", "In-Reply-To": "<20260417104652.17857-1-xiaoou@iscas.ac.cn>", "References": "<20260417104652.17857-1-xiaoou@iscas.ac.cn>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "X-CM-TRANSID": "qwCowAB3H2ulD+JpLDmSDQ--.804S15", "X-Coremail-Antispam": "1UD129KBjvAXoWDKw1rGFWfGrykAFy3Kr1DGFg_yoWfXr48to\n W5Gw15Ar97GrW7ua4akw4UXFy7Zry2vwn3Jr45Zr47uayfGr47KFn8Jrn5Zay8JrWFkFWf\n XFZ3Grn5tr1a934Dn29KB7ZKAUJUUUU8529EdanIXcx71UUUUU7v73VFW2AGmfu7bjvjm3\n AaLaJ3UjIYCTnIWjp_UUUY37AC8VAFwI0_Wr0E3s1l1xkIjI8I6I8E6xAIw20EY4v20xva\n j40_Wr0E3s1l1IIY67AEw4v_Jr0_Jr4l82xGYIkIc2x26280x7IE14v26r126s0DM28Irc\n Ia0xkI8VCY1x0267AKxVW5JVCq3wA2ocxC64kIII0Yj41l84x0c7CEw4AK67xGY2AK021l\n 84ACjcxK6xIIjxv20xvE14v26ryj6F1UM28EF7xvwVC0I7IYx2IY6xkF7I0E14v26r4UJV\n WxJr1l84ACjcxK6I8E87Iv67AKxVW0oVCq3wA2z4x0Y4vEx4A2jsIEc7CjxVAFwI0_GcCE\n 3s1le2I262IYc4CY6c8Ij28IcVAaY2xG8wAqx4xG64xvF2IEw4CE5I8CrVC2j2WlYx0E2I\n x0cI8IcVAFwI0_Jrv_JF1lYx0Ex4A2jsIE14v26r4j6F4UMcvjeVCFs4IE7xkEbVWUJVW8\n JwACjI8F5VA0II8E6IAqYI8I648v4I1lc7CjxVAaw2AFwI0_Jw0_GFyl4I8I3I0E4IkC6x\n 0Yz7v_Jr0_Gr1lx2IqxVAqx4xG67AKxVWUJVWUGwC20s026x8GjcxK67AKxVWUGVWUWwC2\n zVAF1VAY17CE14v26r1q6r43MIIF0xvE2Ix0cI8IcVAFwI0_Gr0_Xr1lIxAIcVC0I7IYx2\n IY6xkF7I0E14v26r4UJVWxJr1lIxAIcVCF04k26cxKx2IYs7xG6r1j6r1xMIIF0xvEx4A2\n jsIE14v26r4j6F4UMIIF0xvEx4A2jsIEc7CjxVAFwI0_Gr1j6F4UJbIYCTnIWIevJa73Uj\n IFyTuYvjfU5TmhDUUUU", "X-Originating-IP": "[36.110.52.2]", "X-CM-SenderInfo": "50ld003x6l2u1dvotugofq/", "Received-SPF": "pass client-ip=159.226.251.21; envelope-from=xiaoou@iscas.ac.cn;\n helo=cstnet.cn", "X-Spam_score_int": "-21", "X-Spam_score": "-2.2", "X-Spam_bar": "--", "X-Spam_report": "(-2.2 / 5.0 requ) BAYES_00=-1.9, HK_RANDOM_ENVFROM=0.998,\n HK_RANDOM_FROM=0.998, RCVD_IN_DNSWL_MED=-2.3,\n RCVD_IN_VALIDITY_RPBL_BLOCKED=0.001, RCVD_IN_VALIDITY_SAFE_BLOCKED=0.001,\n SPF_HELO_PASS=-0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no", "X-Spam_action": "no action", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "qemu development <qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org" }, "content": "Signed-off-by: Molly Chen <xiaoou@iscas.ac.cn>\n---\n target/riscv/helper.h | 131 ++\n target/riscv/insn32.decode | 279 +++\n target/riscv/insn_trans/trans_rvp.c.inc | 786 ++++++++-\n target/riscv/psimd_helper.c | 2068 +++++++++++++++++++++++\n 4 files changed, 3220 insertions(+), 44 deletions(-)", "diff": "diff --git a/target/riscv/helper.h b/target/riscv/helper.h\nindex 663ac0e242..85d4fe1b67 100644\n--- a/target/riscv/helper.h\n+++ b/target/riscv/helper.h\n@@ -1737,3 +1737,134 @@ DEF_HELPER_3(pm4addu_h, i64, env, i64, i64)\n DEF_HELPER_4(pm4adda_h, i64, env, i64, i64, i64)\n DEF_HELPER_4(pm4addasu_h, i64, env, i64, i64, i64)\n DEF_HELPER_4(pm4addau_h, i64, env, i64, i64, i64)\n+\n+/* Packed SIMD - Double-Width Operations (RV32 only, register pairs) */\n+DEF_HELPER_3(pwadd_b, i64, env, i32, i32)\n+DEF_HELPER_4(pwadda_b, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwaddu_b, i64, env, i32, i32)\n+DEF_HELPER_4(pwaddau_b, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwsub_b, i64, env, i32, i32)\n+DEF_HELPER_4(pwsuba_b, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwsubu_b, i64, env, i32, i32)\n+DEF_HELPER_4(pwsubau_b, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwslli_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwsll_bs, i64, env, i32, i32)\n+DEF_HELPER_3(pwslai_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwsla_bs, i64, env, i32, i32)\n+\n+DEF_HELPER_3(pwadd_h, i64, env, i32, i32)\n+DEF_HELPER_4(pwadda_h, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwaddu_h, i64, env, i32, i32)\n+DEF_HELPER_4(pwaddau_h, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwsub_h, i64, env, i32, i32)\n+DEF_HELPER_4(pwsuba_h, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwsubu_h, i64, env, i32, i32)\n+DEF_HELPER_4(pwsubau_h, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwslli_h, i64, env, i32, i32)\n+DEF_HELPER_3(pwsll_hs, i64, env, i32, i32)\n+DEF_HELPER_3(pwslai_h, i64, env, i32, i32)\n+DEF_HELPER_3(pwsla_hs, i64, env, i32, i32)\n+\n+DEF_HELPER_3(wadd, i64, env, i32, i32)\n+DEF_HELPER_4(wadda, i64, env, i32, i32, i64)\n+DEF_HELPER_3(waddu, i64, env, i32, i32)\n+DEF_HELPER_4(waddau, i64, env, i32, i32, i64)\n+DEF_HELPER_3(wsub, i64, env, i32, i32)\n+DEF_HELPER_4(wsuba, i64, env, i32, i32, i64)\n+DEF_HELPER_3(wsubu, i64, env, i32, i32)\n+DEF_HELPER_4(wsubau, i64, env, i32, i32, i64)\n+DEF_HELPER_3(wslli, i64, env, i32, i32)\n+DEF_HELPER_3(wsll, i64, env, i32, i32)\n+DEF_HELPER_3(wslai, i64, env, i32, i32)\n+DEF_HELPER_3(wsla, i64, env, i32, i32)\n+\n+DEF_HELPER_3(wzip8p, i64, env, i32, i32)\n+DEF_HELPER_3(wzip16p, i64, env, i32, i32)\n+\n+DEF_HELPER_4(predsum_dbs, i32, env, i32, i32, i32)\n+DEF_HELPER_4(predsumu_dbs, i32, env, i32, i32, i32)\n+DEF_HELPER_4(predsum_dhs, i32, env, i32, i32, i32)\n+DEF_HELPER_4(predsumu_dhs, i32, env, i32, i32, i32)\n+\n+DEF_HELPER_3(pnsrli_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrai_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrari_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipi_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipri_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipiu_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipriu_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrl_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnsra_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrar_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclip_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipr_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipu_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipru_bs, i32, env, i64, i32)\n+\n+DEF_HELPER_3(pnsrli_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrai_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrari_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipi_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipri_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipiu_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipriu_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrl_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnsra_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrar_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclip_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipr_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipu_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipru_hs, i32, env, i64, i32)\n+\n+DEF_HELPER_3(nsrli, i32, env, i64, i32)\n+DEF_HELPER_3(nsrai, i32, env, i64, i32)\n+DEF_HELPER_3(nsrari, i32, env, i64, i32)\n+DEF_HELPER_3(nclipi, i32, env, i64, i32)\n+DEF_HELPER_3(nclipri, i32, env, i64, i32)\n+DEF_HELPER_3(nclipiu, i32, env, i64, i32)\n+DEF_HELPER_3(nclipriu, i32, env, i64, i32)\n+DEF_HELPER_3(nsrl, i32, env, i64, i32)\n+DEF_HELPER_3(nsra, i32, env, i64, i32)\n+DEF_HELPER_3(nsrar, i32, env, i64, i32)\n+DEF_HELPER_3(nclip, i32, env, i64, i32)\n+DEF_HELPER_3(nclipr, i32, env, i64, i32)\n+DEF_HELPER_3(nclipu, i32, env, i64, i32)\n+DEF_HELPER_3(nclipru, i32, env, i64, i32)\n+\n+DEF_HELPER_4(pmqwacc_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pmqrwacc_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(mqwacc, i64, env, i32, i32, i64)\n+DEF_HELPER_4(mqrwacc, i64, env, i32, i32, i64)\n+\n+DEF_HELPER_3(pwmul_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwmulsu_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwmulu_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwmul_h, i64, env, i32, i32)\n+DEF_HELPER_3(pwmulsu_h, i64, env, i32, i32)\n+DEF_HELPER_3(pwmulu_h, i64, env, i32, i32)\n+\n+DEF_HELPER_4(pwmacc_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pwmaccsu_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pwmaccu_h, i64, env, i32, i32, i64)\n+\n+DEF_HELPER_3(wmul, i64, env, i32, i32)\n+DEF_HELPER_3(wmulsu, i64, env, i32, i32)\n+DEF_HELPER_3(wmulu, i64, env, i32, i32)\n+\n+DEF_HELPER_4(wmacc, i64, env, i32, i32, i64)\n+DEF_HELPER_4(wmaccsu, i64, env, i32, i32, i64)\n+DEF_HELPER_4(wmaccu, i64, env, i32, i32, i64)\n+\n+DEF_HELPER_3(pm2wadd_h, i64, env, i32, i32)\n+DEF_HELPER_3(pm2waddsu_h, i64, env, i32, i32)\n+DEF_HELPER_3(pm2waddu_h, i64, env, i32, i32)\n+DEF_HELPER_3(pm2wadd_hx, i64, env, i32, i32)\n+DEF_HELPER_4(pm2wadda_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pm2waddasu_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pm2waddau_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pm2wadda_hx, i64, env, i32, i32, i64)\n+\n+DEF_HELPER_3(pm2wsub_h, i64, env, i32, i32)\n+DEF_HELPER_3(pm2wsub_hx, i64, env, i32, i32)\n+DEF_HELPER_4(pm2wsuba_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pm2wsuba_hx, i64, env, i32, i32, i64)\ndiff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode\nindex b1bde37de4..7be0b9e5e6 100644\n--- a/target/riscv/insn32.decode\n+++ b/target/riscv/insn32.decode\n@@ -23,6 +23,9 @@\n %rd 7:5\n %sh5 20:5\n %sh6 20:6\n+%rs2_p 21:4\n+%rs1_p 16:4\n+%rd_p 8:4\n \n %sh7 20:7\n %csr 20:12\n@@ -69,6 +72,7 @@\n &mop5 imm rd rs1\n &mop3 imm rd rs1 rs2\n &p_l imm rd\n+&p_ui imm rs1 rd\n \n # Formats 32:\n @r ....... ..... ..... ... ..... ....... &r %rs2 %rs1 %rd\n@@ -101,6 +105,11 @@\n @r2_zimm11 . zimm:11 ..... ... ..... ....... %rs1 %rd\n @r2_zimm10 .. zimm:10 ..... ... ..... ....... %rs1 %rd\n @r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1\n+@r_p_1 ....... ..... ..... ... ..... ....... &r %rs2 %rs1 rd=%rd_p\n+@r_p_2 ....... ..... ..... ... ..... ....... &r rs2=%rs2_p rs1=%rs1_p rd=%rd_p\n+@r_p_3 ....... ..... ..... ... ..... ....... &r %rs2 rs1=%rs1_p rd=%rd_p\n+@r_p_4 ....... ..... ..... ... ..... ....... &r %rs2 rs1=%rs1_p %rd\n+@r2_p ....... ..... ..... ... ..... ....... &r2 rs1=%rs1_p rd=%rd_p\n \n @hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1\n @hfence_vvma ....... ..... ..... ... ..... ....... %rs2 %rs1\n@@ -122,6 +131,18 @@\n @p_l2 ....... .......... ... ..... ....... &p_l imm=%imm_p_l2 %rd\n @p_l3 ....... .......... ... ..... ....... &p_l imm=%imm_p_l3 %rd\n @p_l4 ....... .......... ... ..... ....... &p_l imm=%imm_p_l4 %rd\n+@p_l1_p ........ ........ .... ..... ....... &p_l imm=%imm_p_l1 rd=%rd_p\n+@p_l2_p ........ ........ .... ..... ....... &p_l imm=%imm_p_l2 rd=%rd_p\n+@p_l3_p ....... .......... ... ..... ....... &p_l imm=%imm_p_l3 rd=%rd_p\n+@p_ui8_p ..... .... ... ..... ... ..... ....... &i imm=%imm_p_ui8 rs1=%rs1_p rd=%rd_p\n+@p_ui16_p ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui16 %rs1 rd=%rd_p\n+@p_ui16_p_2 ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui16 rs1=%rs1_p rd=%rd_p\n+@p_ui16_p_3 ..... .... ... .... .... ..... ....... &p_ui imm=%imm_p_ui16 rs1=%rs1_p %rd\n+@p_ui32_p ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui32 %rs1 rd=%rd_p\n+@p_ui32_p_2 ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui32 rs1=%rs1_p rd=%rd_p\n+@p_ui32_p_3 ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui32 rs1=%rs1_p %rd\n+@p_ui64_p ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui64 %rs1 rd=%rd_p\n+@p_ui64_p_2 ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui64 rs1=%rs1_p %rd\n \n # Formats 64:\n @sh5 ....... ..... ..... ... ..... ....... &shift shamt=%sh5 %rs1 %rd\n@@ -1612,3 +1633,261 @@ pli_h 1011000 .......... 010 ..... 0011011 @p_l2\n plui_h 1111000 .......... 010 ..... 0011011 @p_l3\n pli_w 1011001 ..... ..... 010 ..... 0011011 @p_l2\n plui_w 1111001 ..... ..... 010 ..... 0011011 @p_l4\n+\n+# Packed SIMD - Double-Width Operations (RV32 only, register pairs)\n+# register-pair destination\n+pwadd_b 0000010 ..... ..... 010 .... 10011011 @r_p_1\n+pwadda_b 0000110 ..... ..... 010 .... 10011011 @r_p_1\n+pwaddu_b 0001010 ..... ..... 010 .... 10011011 @r_p_1\n+pwaddau_b 0001110 ..... ..... 010 .... 10011011 @r_p_1\n+pwsub_b 0100010 ..... ..... 010 .... 10011011 @r_p_1\n+pwsuba_b 0100110 ..... ..... 010 .... 10011011 @r_p_1\n+pwsubu_b 0101010 ..... ..... 010 .... 10011011 @r_p_1\n+pwsubau_b 0101110 ..... ..... 010 .... 10011011 @r_p_1\n+pwslli_b 00000 001.... ..... 010 .... 00011011 @p_ui16_p\n+pwsll_bs 0000100 ..... ..... 010 .... 00011011 @r_p_1\n+pwslai_b 01000 001.... ..... 010 .... 00011011 @p_ui16_p\n+pwsla_bs 0100100 ..... ..... 010 .... 00011011 @r_p_1\n+\n+pwadd_h 0000000 ..... ..... 010 .... 10011011 @r_p_1\n+pwadda_h 0000100 ..... ..... 010 .... 10011011 @r_p_1\n+pwaddu_h 0001000 ..... ..... 010 .... 10011011 @r_p_1\n+pwaddau_h 0001100 ..... ..... 010 .... 10011011 @r_p_1\n+pwsub_h 0100000 ..... ..... 010 .... 10011011 @r_p_1\n+pwsuba_h 0100100 ..... ..... 010 .... 10011011 @r_p_1\n+pwsubu_h 0101000 ..... ..... 010 .... 10011011 @r_p_1\n+pwsubau_h 0101100 ..... ..... 010 .... 10011011 @r_p_1\n+pwslli_h 00000 01..... ..... 010 .... 00011011 @p_ui32_p\n+pwsll_hs 0000101 ..... ..... 010 .... 00011011 @r_p_1\n+pwslai_h 01000 01..... ..... 010 .... 00011011 @p_ui32_p\n+pwsla_hs 0100101 ..... ..... 010 .... 00011011 @r_p_1\n+\n+wadd 0000001 ..... ..... 010 .... 10011011 @r_p_1\n+wadda 0000101 ..... ..... 010 .... 10011011 @r_p_1\n+waddu 0001001 ..... ..... 010 .... 10011011 @r_p_1\n+waddau 0001101 ..... ..... 010 .... 10011011 @r_p_1\n+wsub 0100001 ..... ..... 010 .... 10011011 @r_p_1\n+wsuba 0100101 ..... ..... 010 .... 10011011 @r_p_1\n+wsubu 0101001 ..... ..... 010 .... 10011011 @r_p_1\n+wsubau 0101101 ..... ..... 010 .... 10011011 @r_p_1\n+wslli 00000 1...... ..... 010 .... 00011011 @p_ui64_p\n+wsll 0000111 ..... ..... 010 .... 00011011 @r_p_1\n+wslai 01000 1...... ..... 010 .... 00011011 @p_ui64_p\n+wsla 0100111 ..... ..... 010 .... 00011011 @r_p_1\n+\n+wzip8p 0111100 ..... ..... 010 .... 00011011 @r_p_1\n+wzip16p 0111101 ..... ..... 010 .... 00011011 @r_p_1\n+\n+#register-pair operands\n+pli_db 00110100 ........ 0010 .... 00011011 @p_l1_p\n+padd_db 1000010 .... 0 .... 0110 .... 00011011 @r_p_2\n+psub_db 1100010 .... 0 .... 0110 .... 00011011 @r_p_2\n+psadd_db 1001010 .... 0 .... 0110 .... 00011011 @r_p_2\n+psaddu_db 1011010 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssub_db 1101010 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssubu_db 1111010 .... 0 .... 0110 .... 00011011 @r_p_2\n+paadd_db 1001110 .... 0 .... 0110 .... 00011011 @r_p_2\n+paaddu_db 1011110 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasub_db 1101110 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasubu_db 1111110 .... 0 .... 0110 .... 00011011 @r_p_2\n+pabd_db 1100110 .... 0 .... 0110 .... 00011011 @r_p_2\n+pabdu_db 1110110 .... 0 .... 0110 .... 00011011 @r_p_2\n+psabs_db 0110010 00111 .... 0110 .... 00011011 @r2_p\n+pli_dh 0011000 .......... 010 .... 00011011 @p_l2_p\n+plui_dh 0111000 .......... 010 .... 00011011 @p_l3_p\n+padd_dh 1000000 .... 0 .... 0110 .... 00011011 @r_p_2\n+psub_dh 1100000 .... 0 .... 0110 .... 00011011 @r_p_2\n+psadd_dh 1001000 .... 0 .... 0110 .... 00011011 @r_p_2\n+psaddu_dh 1011000 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssub_dh 1101000 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssubu_dh 1111000 .... 0 .... 0110 .... 00011011 @r_p_2\n+paadd_dh 1001100 .... 0 .... 0110 .... 00011011 @r_p_2\n+paaddu_dh 1011100 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasub_dh 1101100 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasubu_dh 1111100 .... 0 .... 0110 .... 00011011 @r_p_2\n+psh1add_dh 1010000 .... 1 .... 0110 .... 00011011 @r_p_2\n+pssh1sadd_dh 1011000 .... 1 .... 0110 .... 00011011 @r_p_2\n+pas_dhx 1000000 .... 1 .... 1110 .... 00011011 @r_p_2\n+psa_dhx 1000010 .... 1 .... 1110 .... 00011011 @r_p_2\n+psas_dhx 1001000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pssa_dhx 1001010 .... 1 .... 1110 .... 00011011 @r_p_2\n+paas_dhx 1001100 .... 1 .... 1110 .... 00011011 @r_p_2\n+pasa_dhx 1001110 .... 1 .... 1110 .... 00011011 @r_p_2\n+pabd_dh 1100100 .... 0 .... 0110 .... 00011011 @r_p_2\n+pabdu_dh 1110100 .... 0 .... 0110 .... 00011011 @r_p_2\n+psabs_dh 0110000 00111 .... 0110 .... 00011011 @r2_p\n+padd_dw 1000001 .... 0 .... 0110 .... 00011011 @r_p_2\n+psub_dw 1100001 .... 0 .... 0110 .... 00011011 @r_p_2\n+psadd_dw 1001001 .... 0 .... 0110 .... 00011011 @r_p_2\n+psaddu_dw 1011001 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssub_dw 1101001 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssubu_dw 1111001 .... 0 .... 0110 .... 00011011 @r_p_2\n+paadd_dw 1001101 .... 0 .... 0110 .... 00011011 @r_p_2\n+paaddu_dw 1011101 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasub_dw 1101101 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasubu_dw 1111101 .... 0 .... 0110 .... 00011011 @r_p_2\n+psh1add_dw 1010001 .... 1 .... 0110 .... 00011011 @r_p_2\n+pssh1sadd_dw 1011001 .... 1 .... 0110 .... 00011011 @r_p_2\n+addd_p 1000011 .... 0 .... 0110 .... 00011011 @r_p_2\n+subd_p 1100011 .... 0 .... 0110 .... 00011011 @r_p_2\n+\n+# register-pair first source only\n+predsum_dbs 0001110 ..... .... 0100 ..... 0011011 @r_p_4\n+predsumu_dbs 0011110 ..... .... 0100 ..... 0011011 @r_p_4\n+predsum_dhs 0001100 ..... .... 0100 ..... 0011011 @r_p_4\n+predsumu_dhs 0011100 ..... .... 0100 ..... 0011011 @r_p_4\n+\n+# register-pair operands\n+pslli_db 00000 0001... .... 0110 .... 00011011 @p_ui8_p\n+psrli_db 00000 0001... .... 1110 .... 00011011 @p_ui8_p\n+psrai_db 01000 0001... .... 1110 .... 00011011 @p_ui8_p\n+pmin_db 1110010 .... 1 .... 1110 .... 00011011 @r_p_2\n+pminu_db 1110110 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmax_db 1111010 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmaxu_db 1111110 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmseq_db 1100010 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmslt_db 1101010 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmsltu_db 1101110 .... 1 .... 1110 .... 00011011 @r_p_2\n+psext_dh_b 0110000 00100 .... 0110 .... 00011011 @r2_p\n+psati_dh 01100 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+pusati_dh 00100 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+pslli_dh 00000 001.... .... 0110 .... 00011011 @p_ui16_p_2\n+psrli_dh 00000 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+psrai_dh 01000 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+psslai_dh 01010 001.... .... 0110 .... 00011011 @p_ui16_p_2\n+psrari_dh 01010 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+pmin_dh 1110000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pminu_dh 1110100 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmax_dh 1111000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmaxu_dh 1111100 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmseq_dh 1100000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmslt_dh 1101000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmsltu_dh 1101100 .... 1 .... 1110 .... 00011011 @r_p_2\n+psext_dw_b 0110001 00100 .... 0110 .... 00011011 @r2_p\n+psext_dw_h 0110001 00101 .... 0110 .... 00011011 @r2_p\n+psati_dw 01100 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+pusati_dw 00100 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+pslli_dw 00000 01..... .... 0110 .... 00011011 @p_ui32_p_2\n+psrli_dw 00000 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+psrai_dw 01000 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+psslai_dw 01010 01..... .... 0110 .... 00011011 @p_ui32_p_2\n+psrari_dw 01010 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+pmin_dw 1110001 .... 1 .... 1110 .... 00011011 @r_p_2\n+pminu_dw 1110101 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmax_dw 1111001 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmaxu_dw 1111101 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmseq_dw 1100001 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmslt_dw 1101001 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmsltu_dw 1101101 .... 1 .... 1110 .... 00011011 @r_p_2\n+\n+# register-pair first source and dest\n+padd_dbs 0001110 ..... .... 0110 .... 00011011 @r_p_3\n+psll_dbs 0000110 ..... .... 0110 .... 00011011 @r_p_3\n+psra_dbs 0100110 ..... .... 1110 .... 00011011 @r_p_3\n+padd_dhs 0001100 ..... .... 0110 .... 00011011 @r_p_3\n+psll_dhs 0000100 ..... .... 0110 .... 00011011 @r_p_3\n+psrl_dhs 0000100 ..... .... 1110 .... 00011011 @r_p_3\n+psra_dhs 0100100 ..... .... 1110 .... 00011011 @r_p_3\n+pssha_dhs 0110100 ..... .... 0110 .... 00011011 @r_p_3\n+psshar_dhs 0111100 ..... .... 0110 .... 00011011 @r_p_3\n+padd_dws 0001101 ..... .... 0110 .... 00011011 @r_p_3\n+psll_dws 0000101 ..... .... 0110 .... 00011011 @r_p_3\n+psrl_dws 0000101 ..... .... 1110 .... 00011011 @r_p_3\n+psra_dws 0100101 ..... .... 1110 .... 00011011 @r_p_3\n+pssha_dws 0110101 ..... .... 0110 .... 00011011 @r_p_3\n+psshar_dws 0111101 ..... .... 0110 .... 00011011 @r_p_3\n+\n+# register-pair operands\n+ppaire_db 1000000 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppaireo_db 1001000 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppairoe_db 1010000 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppairo_db 1011000 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppaire_dh 1000001 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppaireo_dh 1001001 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppairoe_dh 1010001 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppairo_dh 1011001 .... 0 .... 1110 .... 00011011 @r_p_2\n+\n+#register-pair first source only\n+pnsrli_b 00000 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnsrai_b 01000 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnsrari_b 01010 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnclipi_b 01100 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnclipri_b 01110 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnclipiu_b 00100 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnclipriu_b 00110 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnsrl_bs 00001 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnsra_bs 01001 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnsrar_bs 01011 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclip_bs 01101 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipr_bs 01111 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipu_bs 00101 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipru_bs 00111 00 ..... .... 1100 ..... 0011011 @r_p_4\n+\n+pnsrli_h 00000 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnsrai_h 01000 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnsrari_h 01010 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnclipi_h 01100 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnclipri_h 01110 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnclipiu_h 00100 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnclipriu_h 00110 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnsrl_hs 00001 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnsra_hs 01001 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnsrar_hs 01011 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclip_hs 01101 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipr_hs 01111 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipu_hs 00101 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipru_hs 00111 01 ..... .... 1100 ..... 0011011 @r_p_4\n+\n+nsrli 00000 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nsrai 01000 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nsrari 01010 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nclipi 01100 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nclipri 01110 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nclipiu 00100 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nclipriu 00110 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nsrl 00001 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nsra 01001 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nsrar 01011 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nclip 01101 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nclipr 01111 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nclipu 00101 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nclipru 00111 11 ..... .... 1100 ..... 0011011 @r_p_4\n+\n+# register-pair multiply\n+pmqwacc_h 01111 00 ..... ..... 010 .... 10011011 @r_p_1\n+pmqrwacc_h 01111 10 ..... ..... 010 .... 10011011 @r_p_1\n+mqwacc 01111 01 ..... ..... 010 .... 10011011 @r_p_1\n+mqrwacc 01111 11 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pwmul_b 00100 10 ..... ..... 010 .... 10011011 @r_p_1\n+pwmulsu_b 01100 10 ..... ..... 010 .... 10011011 @r_p_1\n+pwmulu_b 00110 10 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pwmul_h 00100 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmulsu_h 01100 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmulu_h 00110 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmacc_h 00101 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmaccsu_h 01101 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmaccu_h 00111 00 ..... ..... 010 .... 10011011 @r_p_1\n+\n+wmul 00100 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmulsu 01100 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmulu 00110 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmacc 00101 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmaccsu 01101 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmaccu 00111 01 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pm2wadd_h 00000 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2waddsu_h 01100 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2waddu_h 00100 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wadd_hx 00010 11 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pm2wadda_h 00001 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2waddasu_h 01101 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2waddau_h 00101 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wadda_hx 00011 11 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pm2wsub_h 01000 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wsub_hx 01010 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wsuba_h 01001 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wsuba_hx 01011 11 ..... ..... 010 .... 10011011 @r_p_1\ndiff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc\nindex b82774e00f..ca459293a3 100644\n--- a/target/riscv/insn_trans/trans_rvp.c.inc\n+++ b/target/riscv/insn_trans/trans_rvp.c.inc\n@@ -2,6 +2,38 @@\n /* RISC-V translation routines for the P Standard Extensions. */\n /* Copyright (c) 2026 ISRC ISCAS. */\n \n+/* Save a 64 bit data in src to dst and dst + 1 */\n+static void set_pair_regs(DisasContext *ctx, int dst, TCGv_i64 src)\n+{\n+#if defined(TARGET_RISCV32)\n+ TCGv_i64 tl_64 = tcg_temp_new_i64();\n+ TCGv_i64 th_64 = tcg_temp_new_i64();\n+ TCGv_i32 tl_32 = tcg_temp_new_i32();\n+ TCGv_i32 th_32 = tcg_temp_new_i32();\n+ tcg_gen_extract_i64(tl_64, src, 0, 32);\n+ tcg_gen_extract_i64(th_64, src, 32, 32);\n+ tcg_gen_trunc_i64_tl(tl_32, tl_64);\n+ tcg_gen_trunc_i64_tl(th_32, th_64);\n+ gen_set_gpr(ctx, dst, tl_32);\n+ gen_set_gpr(ctx, dst + 1, th_32);\n+# else\n+ gen_set_gpr(ctx, dst, src);\n+#endif\n+}\n+\n+/* Concat two 32 bit data in src and src + 1 to dst */\n+static void get_pair_regs(DisasContext *ctx, TCGv_i64 dst, int src)\n+{\n+#if defined(TARGET_RISCV32)\n+ TCGv t1 = get_gpr(ctx, src, EXT_NONE);\n+ TCGv t2 = get_gpr(ctx, src + 1, EXT_NONE);\n+ tcg_gen_concat_i32_i64(dst, t1, t2);\n+#else\n+ TCGv t1 = get_gpr(ctx, src, EXT_NONE);\n+ tcg_gen_mov_tl(dst, t1);\n+#endif\n+}\n+\n #define GEN_SIMD_TRANS(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n@@ -10,7 +42,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE); \\\n TCGv dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, src1, src2); \\\n- return true; \\\n+ return true; \\\n }\n \n #if defined(TARGET_RISCV32)\n@@ -23,14 +55,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE); \\\n TCGv dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, src1, src2); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_32(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -39,7 +71,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_64BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_64(NAME) \\\n@@ -51,7 +83,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE); \\\n TCGv dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, src1, src2); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -65,7 +97,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv t = tcg_temp_new(); \\\n gen_helper_##NAME(t, tcg_env, src1, src2, dest); \\\n gen_set_gpr(ctx, a->rd, t); \\\n- return true; \\\n+ return true; \\\n }\n \n #if defined(TARGET_RISCV32)\n@@ -80,14 +112,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv t = tcg_temp_new(); \\\n gen_helper_##NAME(t, tcg_env, src1, src2, dest); \\\n gen_set_gpr(ctx, a->rd, t); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_ACC_32(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -96,7 +128,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_64BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_ACC_64(NAME) \\\n@@ -110,7 +142,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv t = tcg_temp_new(); \\\n gen_helper_##NAME(t, tcg_env, src1, src2, dest); \\\n gen_set_gpr(ctx, a->rd, t); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -122,7 +154,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, src1); \\\n gen_set_gpr(ctx, a->rd, dest); \\\n- return true; \\\n+ return true; \\\n }\n \n #if defined(TARGET_RISCV32)\n@@ -130,7 +162,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_64BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_R1_64(NAME) \\\n@@ -141,7 +173,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE); \\\n TCGv dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, src1); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -153,7 +185,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv imm = tcg_constant_tl(a->imm); \\\n TCGv dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, src1, imm); \\\n- return true; \\\n+ return true; \\\n }\n \n #if defined(TARGET_RISCV32)\n@@ -166,14 +198,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv imm = tcg_constant_tl(a->imm); \\\n TCGv dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, src1, imm); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_IMM_32(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -182,7 +214,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_64BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_IMM_64(NAME) \\\n@@ -194,7 +226,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv imm = tcg_constant_tl(a->imm); \\\n TCGv dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, src1, imm); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -209,14 +241,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv_i64 t = tcg_temp_new_i64(); \\\n gen_helper_##NAME(t, tcg_env, src1, src2); \\\n set_pair_regs(ctx, (a->rd) * 2, t); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_1(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -234,14 +266,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1); \\\n gen_helper_##HELPER(dest_0, tcg_env, src1_0, src2_0); \\\n gen_helper_##HELPER(dest_1, tcg_env, src1_1, src2_1); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_2(INSN, HELPER) \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -257,14 +289,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE); \\\n gen_helper_##HELPER(dest_0, tcg_env, src1_0, src2); \\\n gen_helper_##HELPER(dest_1, tcg_env, src1_1, src2); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_3(INSN, HELPER) \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -282,14 +314,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1); \\\n gen_helper_##HELPER(dest_0, tcg_env, src1_0, src2_0); \\\n gen_helper_##HELPER(dest_1, tcg_env, src1_1, src2_1); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_DW(INSN, HELPER) \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -307,14 +339,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1); \\\n gen_helper_##HELPER(dest_0, tcg_env, src1_0, imm_0); \\\n gen_helper_##HELPER(dest_1, tcg_env, src1_1, imm_1); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_DW_IMM(INSN, HELPER) \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -332,14 +364,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1); \\\n gen_helper_##HELPER##_32(dest_0, tcg_env, src1_0, imm_0); \\\n gen_helper_##HELPER##_32(dest_1, tcg_env, src1_1, imm_1); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_DW_IMM_2(INSN, HELPER) \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -356,14 +388,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n gen_helper_##HELPER(dest_1, tcg_env, src1_1); \\\n gen_set_gpr(ctx, (a->rd) * 2, dest_0); \\\n gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_5(INSN, HELPER) \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -378,14 +410,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv_i64 t = tcg_temp_new_i64(); \\\n gen_helper_##NAME(t, tcg_env, src1, imm); \\\n set_pair_regs(ctx, (a->rd) * 2, t); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_IMM(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -403,14 +435,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1); \\\n gen_helper_##HELPER(dest_0, tcg_env, src1_0, imm_0); \\\n gen_helper_##HELPER(dest_1, tcg_env, src1_1, imm_1); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_IMM_2(INSN, HELPER) \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -430,14 +462,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n } \\\n gen_helper_##NAME(t, tcg_env, src1, src2, t); \\\n set_pair_regs(ctx, (a->rd) * 2, t); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_ACC_REG_PAIR_1(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -461,14 +493,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n src1_h = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE); \\\n } \\\n gen_helper_##NAME(dest, tcg_env, src1_l, src1_h, src2); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_PREDSUM(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -487,14 +519,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv shamt = tcg_constant_tl(a->imm); \\\n TCGv_i32 dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, s1, shamt); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_PN_OP_IMM(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -513,14 +545,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n TCGv_i32 rs2 = get_gpr(ctx, a->rs2, EXT_NONE); \\\n TCGv_i32 dest = dest_gpr(ctx, a->rd); \\\n gen_helper_##NAME(dest, tcg_env, s1, rs2); \\\n- return true; \\\n+ return true; \\\n }\n #else\n #define GEN_SIMD_TRANS_PN_OP_REG(NAME) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n { \\\n REQUIRE_32BIT(ctx); \\\n- return true; \\\n+ return true; \\\n }\n #endif\n \n@@ -907,6 +939,236 @@ GEN_SIMD_TRANS_ACC_64(pm4adda_h)\n GEN_SIMD_TRANS_ACC_64(pm4addasu_h)\n GEN_SIMD_TRANS_ACC_64(pm4addau_h)\n \n+/* Packed SIMD - Double-Width Operations (RV32 only, register pairs) */\n+GEN_SIMD_TRANS_REG_PAIR_1(pwadd_b)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwadda_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwaddu_b)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwaddau_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsub_b)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwsuba_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsubu_b)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwsubau_b)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(pwslli_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsll_bs)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(pwslai_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsla_bs)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pwadd_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwadda_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwaddu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwaddau_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsub_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwsuba_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsubu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwsubau_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(pwslli_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsll_hs)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(pwslai_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsla_hs)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(wadd)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wadda)\n+GEN_SIMD_TRANS_REG_PAIR_1(waddu)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(waddau)\n+GEN_SIMD_TRANS_REG_PAIR_1(wsub)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wsuba)\n+GEN_SIMD_TRANS_REG_PAIR_1(wsubu)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wsubau)\n+\n+GEN_SIMD_TRANS_REG_PAIR_IMM(wslli)\n+GEN_SIMD_TRANS_REG_PAIR_1(wsll)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(wslai)\n+GEN_SIMD_TRANS_REG_PAIR_1(wsla)\n+\n+GEN_SIMD_TRANS_REG_PAIR_2(padd_db, padd_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(psub_db, psub_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(psadd_db, psadd_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(psaddu_db, psaddu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssub_db, pssub_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssubu_db, pssubu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(paadd_db, paadd_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(paaddu_db, paaddu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasub_db, pasub_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasubu_db, pasubu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pabd_db, pabd_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pabdu_db, pabdu_b)\n+GEN_SIMD_TRANS_REG_PAIR_5(psabs_db, psabs_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(padd_dh, padd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(psub_dh, psub_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(psadd_dh, psadd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(psaddu_dh, psaddu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssub_dh, pssub_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssubu_dh, pssubu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(paadd_dh, paadd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(paaddu_dh, paaddu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasub_dh, pasub_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasubu_dh, pasubu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(psh1add_dh, psh1add_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssh1sadd_dh, pssh1sadd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pas_dhx, pas_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(psa_dhx, psa_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(psas_dhx, psas_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssa_dhx, pssa_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(paas_dhx, paas_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasa_dhx, pasa_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(pabd_dh, pabd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pabdu_dh, pabdu_h)\n+GEN_SIMD_TRANS_REG_PAIR_5(psabs_dh, psabs_h)\n+GEN_SIMD_TRANS_REG_PAIR_DW(psadd_dw, sadd)\n+GEN_SIMD_TRANS_REG_PAIR_DW(psaddu_dw, saddu)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pssub_dw, ssub)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pssubu_dw, ssubu)\n+GEN_SIMD_TRANS_REG_PAIR_DW(paadd_dw, aadd)\n+GEN_SIMD_TRANS_REG_PAIR_DW(paaddu_dw, aaddu)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pasub_dw, asub)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pasubu_dw, asubu)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pssh1sadd_dw, ssh1sadd)\n+\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(pslli_db, pslli_b)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrli_db, psrli_b)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrai_db, psrai_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmin_db, pmin_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pminu_db, pminu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmax_db, pmax_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmaxu_db, pmaxu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmseq_db, pmseq_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmslt_db, pmslt_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmsltu_db, pmsltu_b)\n+GEN_SIMD_TRANS_REG_PAIR_5(psext_dh_b, psext_h_b)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psati_dh, psati_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(pusati_dh, pusati_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(pslli_dh, pslli_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrli_dh, psrli_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrai_dh, psrai_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psslai_dh, psslai_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrari_dh, psrari_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmin_dh, pmin_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pminu_dh, pminu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmax_dh, pmax_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmaxu_dh, pmaxu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmseq_dh, pmseq_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmslt_dh, pmslt_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmsltu_dh, pmsltu_h)\n+GEN_SIMD_TRANS_REG_PAIR_DW_IMM_2(psati_dw, sati)\n+GEN_SIMD_TRANS_REG_PAIR_DW_IMM_2(pusati_dw, usati)\n+GEN_SIMD_TRANS_REG_PAIR_DW_IMM(psslai_dw, sslai)\n+GEN_SIMD_TRANS_REG_PAIR_DW_IMM_2(psrari_dw, srari)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pmseq_dw, mseq)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pmslt_dw, mslt)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pmsltu_dw, msltu)\n+\n+GEN_SIMD_TRANS_REG_PAIR_3(padd_dbs, padd_bs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psll_dbs, psll_bs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psra_dbs, psra_bs)\n+GEN_SIMD_TRANS_REG_PAIR_3(padd_dhs, padd_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psll_dhs, psll_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psrl_dhs, psrl_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psra_dhs, psra_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(pssha_dhs, pssha_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psshar_dhs, psshar_hs)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pssha_dws, ssha)\n+GEN_SIMD_TRANS_REG_PAIR_DW(psshar_dws, sshar)\n+\n+GEN_SIMD_TRANS_REG_PAIR_2(ppairo_db, ppairo_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppairo_dh, ppairo_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppaire_db, ppaire_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppaireo_db, ppaireo_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppaireo_dh, ppaireo_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppairoe_dh, ppairoe_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppairoe_db, ppairoe_b)\n+\n+GEN_SIMD_TRANS_REG_PAIR_PREDSUM(predsum_dbs)\n+GEN_SIMD_TRANS_REG_PAIR_PREDSUM(predsumu_dbs)\n+GEN_SIMD_TRANS_REG_PAIR_PREDSUM(predsum_dhs)\n+GEN_SIMD_TRANS_REG_PAIR_PREDSUM(predsumu_dhs)\n+\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrli_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrai_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrari_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipi_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipri_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipiu_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipriu_b)\n+\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrli_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrai_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrari_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipi_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipri_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipiu_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipriu_h)\n+\n+GEN_SIMD_TRANS_PN_OP_IMM(nsrli)\n+GEN_SIMD_TRANS_PN_OP_IMM(nsrai)\n+GEN_SIMD_TRANS_PN_OP_IMM(nsrari)\n+GEN_SIMD_TRANS_PN_OP_IMM(nclipi)\n+GEN_SIMD_TRANS_PN_OP_IMM(nclipri)\n+GEN_SIMD_TRANS_PN_OP_IMM(nclipiu)\n+GEN_SIMD_TRANS_PN_OP_IMM(nclipriu)\n+\n+GEN_SIMD_TRANS_PN_OP_REG(pnsrl_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnsra_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnsrar_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclip_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipr_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipu_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipru_bs)\n+\n+GEN_SIMD_TRANS_PN_OP_REG(pnsrl_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnsra_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnsrar_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclip_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipr_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipu_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipru_hs)\n+\n+GEN_SIMD_TRANS_PN_OP_REG(nsrl)\n+GEN_SIMD_TRANS_PN_OP_REG(nsra)\n+GEN_SIMD_TRANS_PN_OP_REG(nsrar)\n+GEN_SIMD_TRANS_PN_OP_REG(nclip)\n+GEN_SIMD_TRANS_PN_OP_REG(nclipr)\n+GEN_SIMD_TRANS_PN_OP_REG(nclipu)\n+GEN_SIMD_TRANS_PN_OP_REG(nclipru)\n+\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pmqwacc_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pmqrwacc_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(mqwacc)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(mqrwacc)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmul_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmulsu_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmulu_b)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmul_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmulsu_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmulu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwmacc_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwmaccsu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwmaccu_h)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(wmul)\n+GEN_SIMD_TRANS_REG_PAIR_1(wmulsu)\n+GEN_SIMD_TRANS_REG_PAIR_1(wmulu)\n+\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wmacc)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wmaccsu)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wmaccu)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2wadd_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2waddsu_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2waddu_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2wadd_hx)\n+\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2wadda_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2waddasu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2waddau_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2wadda_hx)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2wsub_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2wsub_hx)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2wsuba_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2wsuba_hx)\n+\n static bool trans_pli_b(DisasContext *ctx, arg_pli_b * a)\n {\n REQUIRE_EXT(ctx, RVP);\n@@ -973,3 +1235,439 @@ static bool trans_plui_w(DisasContext *ctx, arg_plui_w * a)\n gen_set_gpri(ctx, a->rd, imm);\n return true;\n }\n+\n+static bool trans_pli_db(DisasContext *ctx, arg_pli_db * a)\n+{\n+ REQUIRE_EXT(ctx, RVP);\n+ int i = 1;\n+ target_long imm = a->imm;\n+ while (i < TARGET_LONG_SIZE) {\n+ imm = ((imm << 8) + a->imm);\n+ i++;\n+ }\n+ gen_set_gpri(ctx, (a->rd) * 2, imm);\n+ gen_set_gpri(ctx, (a->rd) * 2 + 1, imm);\n+ return true;\n+}\n+\n+static bool trans_pli_dh(DisasContext *ctx, arg_pli_dh * a)\n+{\n+ REQUIRE_EXT(ctx, RVP);\n+ int i = 1;\n+ target_long imm = a->imm;\n+ while (i < TARGET_LONG_SIZE / 2) {\n+ imm = (imm << 16) + (a->imm & 0xFFFF);\n+ i++;\n+ }\n+ gen_set_gpri(ctx, (a->rd) * 2, imm);\n+ gen_set_gpri(ctx, (a->rd) * 2 + 1, imm);\n+ return true;\n+}\n+\n+static bool trans_plui_dh(DisasContext *ctx, arg_plui_dh * a)\n+{\n+ REQUIRE_EXT(ctx, RVP);\n+ int i = 1;\n+ target_long imm = a->imm;\n+ while (i < TARGET_LONG_SIZE / 2) {\n+ imm = (imm << 16) + (a->imm & 0xFFFF);\n+ i++;\n+ }\n+ gen_set_gpri(ctx, (a->rd) * 2, imm);\n+ gen_set_gpri(ctx, (a->rd) * 2 + 1, imm);\n+ return true;\n+}\n+\n+static bool trans_padd_dw(DisasContext *ctx, arg_padd_dw * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+ TCGv src2_0 = get_gpr(ctx, (a->rs2) * 2, EXT_NONE);\n+ TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+ TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+ TCGv src2_1 = get_gpr(ctx, (a->rs2) * 2 + 1, EXT_NONE);\n+ TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+ tcg_gen_add_tl(dest_0, src1_0, src2_0);\n+ tcg_gen_add_tl(dest_1, src1_1, src2_1);\n+ gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+ gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+ return true;\n+}\n+\n+static bool trans_psub_dw(DisasContext *ctx, arg_psub_dw * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+ TCGv src2_0 = get_gpr(ctx, (a->rs2) * 2, EXT_NONE);\n+ TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+ TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+ TCGv src2_1 = get_gpr(ctx, (a->rs2) * 2 + 1, EXT_NONE);\n+ TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+ tcg_gen_sub_tl(dest_0, src1_0, src2_0);\n+ tcg_gen_sub_tl(dest_1, src1_1, src2_1);\n+ gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+ gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+ return true;\n+}\n+\n+static bool trans_psh1add_dw(DisasContext *ctx, arg_psh1add_dw * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+ TCGv src2_0 = get_gpr(ctx, (a->rs2) * 2, EXT_NONE);\n+ TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+ TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+ TCGv src2_1 = get_gpr(ctx, (a->rs2) * 2 + 1, EXT_NONE);\n+ TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+ gen_sh1add(dest_0, src1_0, src2_0);\n+ gen_sh1add(dest_1, src1_1, src2_1);\n+ gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+ gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+ return true;\n+}\n+\n+/* Verify rd is not zero register for wzip8p and wzip16p. */\n+#if defined(TARGET_RISCV32)\n+static bool trans_wzip8p(DisasContext *ctx, arg_wzip8p * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv_i32 src1 = get_gpr(ctx, a->rs1, EXT_NONE);\n+ TCGv_i32 src2 = get_gpr(ctx, a->rs2, EXT_NONE);\n+ TCGv_i64 t = tcg_temp_new_i64();\n+ if (a->rd == 0) {\n+ return true;\n+ } else {\n+ get_pair_regs(ctx, t, (a->rd) * 2);\n+ }\n+ gen_helper_wzip8p(t, tcg_env, src1, src2);\n+ set_pair_regs(ctx, (a->rd) * 2, t);\n+ return true;\n+}\n+#else\n+static bool trans_wzip8p(DisasContext *ctx, arg_wzip8p * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ return true;\n+}\n+#endif\n+\n+#if defined(TARGET_RISCV32)\n+static bool trans_wzip16p(DisasContext *ctx, arg_wzip16p * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv_i32 src1 = get_gpr(ctx, a->rs1, EXT_NONE);\n+ TCGv_i32 src2 = get_gpr(ctx, a->rs2, EXT_NONE);\n+ TCGv_i64 t = tcg_temp_new_i64();\n+ if (a->rd == 0) {\n+ return true;\n+ } else {\n+ get_pair_regs(ctx, t, (a->rd) * 2);\n+ }\n+ gen_helper_wzip16p(t, tcg_env, src1, src2);\n+ set_pair_regs(ctx, (a->rd) * 2, t);\n+ return true;\n+}\n+#else\n+static bool trans_wzip16p(DisasContext *ctx, arg_wzip16p * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ return true;\n+}\n+#endif\n+\n+static bool trans_addd_p(DisasContext *ctx, arg_addd_p * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv_i64 src1 = tcg_temp_new_i64();\n+ TCGv_i64 src2 = tcg_temp_new_i64();\n+ TCGv_i64 dest = tcg_temp_new_i64();\n+ get_pair_regs(ctx, src1, (a->rs1) * 2);\n+ get_pair_regs(ctx, src2, (a->rs2) * 2);\n+ get_pair_regs(ctx, dest, (a->rd) * 2);\n+ tcg_gen_add_i64(dest, src1, src2);\n+ set_pair_regs(ctx, (a->rd) * 2, dest);\n+\n+ return true;\n+}\n+\n+static bool trans_subd_p(DisasContext *ctx, arg_subd_p * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv_i64 src1 = tcg_temp_new_i64();\n+ TCGv_i64 src2 = tcg_temp_new_i64();\n+ TCGv_i64 dest = tcg_temp_new_i64();\n+ get_pair_regs(ctx, src1, (a->rs1) * 2);\n+ get_pair_regs(ctx, src2, (a->rs2) * 2);\n+ get_pair_regs(ctx, dest, (a->rd) * 2);\n+ tcg_gen_sub_i64(dest, src1, src2);\n+ set_pair_regs(ctx, (a->rd) * 2, dest);\n+\n+ return true;\n+}\n+\n+static bool trans_psext_dw_b(DisasContext *ctx, arg_psext_dw_b * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+ TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+ TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+ TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+\n+ tcg_gen_ext8s_tl(dest_0, src1_0);\n+ gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+\n+ tcg_gen_ext8s_tl(dest_1, src1_1);\n+ gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+\n+ return true;\n+}\n+\n+static bool trans_psext_dw_h(DisasContext *ctx, arg_psext_dw_h * a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+ TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+ TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+ TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+\n+ tcg_gen_ext16s_tl(dest_0, src1_0);\n+ gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+\n+ tcg_gen_ext16s_tl(dest_1, src1_1);\n+ gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+\n+ return true;\n+}\n+\n+static bool trans_pslli_dw(DisasContext *ctx, arg_pslli_dw *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ arg_shift a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.shamt = a->imm;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.shamt = a->imm;\n+\n+ gen_shift_imm_fn(ctx, &a0, EXT_NONE, tcg_gen_shli_tl, NULL);\n+ gen_shift_imm_fn(ctx, &a1, EXT_NONE, tcg_gen_shli_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_psrli_dw(DisasContext *ctx, arg_psrli_dw *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ arg_shift a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.shamt = a->imm;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.shamt = a->imm;\n+\n+ gen_shift_imm_fn_per_ol(ctx, &a0, EXT_NONE, tcg_gen_shri_tl,\n+ gen_srliw, NULL);\n+ gen_shift_imm_fn_per_ol(ctx, &a1, EXT_NONE, tcg_gen_shri_tl,\n+ gen_srliw, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_psrai_dw(DisasContext *ctx, arg_psrai_dw *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ arg_shift a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.shamt = a->imm;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.shamt = a->imm;\n+\n+ gen_shift_imm_fn_per_ol(ctx, &a0, EXT_NONE, tcg_gen_sari_tl,\n+ gen_sraiw, NULL);\n+ gen_shift_imm_fn_per_ol(ctx, &a1, EXT_NONE, tcg_gen_sari_tl,\n+ gen_sraiw, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_pmin_dw(DisasContext *ctx, arg_pmin_dw *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ REQUIRE_ZBB(ctx);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = (a->rs2) * 2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = (a->rs2) * 2 + 1;\n+\n+ gen_arith(ctx, &a0, EXT_SIGN, tcg_gen_smin_tl, NULL);\n+ gen_arith(ctx, &a1, EXT_SIGN, tcg_gen_smin_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_pminu_dw(DisasContext *ctx, arg_pminu_dw *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ REQUIRE_ZBB(ctx);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = (a->rs2) * 2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = (a->rs2) * 2 + 1;\n+\n+ gen_arith(ctx, &a0, EXT_SIGN, tcg_gen_umin_tl, NULL);\n+ gen_arith(ctx, &a1, EXT_SIGN, tcg_gen_umin_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_pmax_dw(DisasContext *ctx, arg_pmax_dw *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ REQUIRE_ZBB(ctx);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = (a->rs2) * 2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = (a->rs2) * 2 + 1;\n+\n+ gen_arith(ctx, &a0, EXT_SIGN, tcg_gen_smax_tl, NULL);\n+ gen_arith(ctx, &a1, EXT_SIGN, tcg_gen_smax_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_pmaxu_dw(DisasContext *ctx, arg_pmaxu_dw *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ REQUIRE_ZBB(ctx);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = (a->rs2) * 2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = (a->rs2) * 2 + 1;\n+\n+ gen_arith(ctx, &a0, EXT_SIGN, tcg_gen_umax_tl, NULL);\n+ gen_arith(ctx, &a1, EXT_SIGN, tcg_gen_umax_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_padd_dws(DisasContext *ctx, arg_padd_dws *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = a->rs2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = a->rs2;\n+\n+ gen_arith(ctx, &a0, EXT_NONE, tcg_gen_add_tl, NULL);\n+ gen_arith(ctx, &a1, EXT_NONE, tcg_gen_add_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_psll_dws(DisasContext *ctx, arg_psll_dws *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = a->rs2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = a->rs2;\n+\n+ gen_shift(ctx, &a0, EXT_NONE, tcg_gen_shl_tl, NULL);\n+ gen_shift(ctx, &a1, EXT_NONE, tcg_gen_shl_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_psrl_dws(DisasContext *ctx, arg_psrl_dws *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = a->rs2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = a->rs2;\n+\n+ gen_shift(ctx, &a0, EXT_ZERO, tcg_gen_shr_tl, NULL);\n+ gen_shift(ctx, &a1, EXT_ZERO, tcg_gen_shr_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_psra_dws(DisasContext *ctx, arg_psra_dws *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = a->rs2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = a->rs2;\n+\n+ gen_shift(ctx, &a0, EXT_SIGN, tcg_gen_sar_tl, NULL);\n+ gen_shift(ctx, &a1, EXT_SIGN, tcg_gen_sar_tl, NULL);\n+\n+ return true;\n+}\n+\n+static bool trans_ppaire_dh(DisasContext *ctx, arg_ppaire_dh *a)\n+{\n+ REQUIRE_32BIT(ctx);\n+ REQUIRE_EXT(ctx, RVP);\n+ REQUIRE_ZBKB(ctx);\n+ arg_r a0, a1;\n+ a0.rd = (a->rd) * 2;\n+ a0.rs1 = (a->rs1) * 2;\n+ a0.rs2 = (a->rs2) * 2;\n+ a1.rd = (a->rd) * 2 + 1;\n+ a1.rs1 = (a->rs1) * 2 + 1;\n+ a1.rs2 = (a->rs2) * 2 + 1;\n+\n+ gen_arith(ctx, &a0, EXT_NONE, gen_pack, NULL);\n+ gen_arith(ctx, &a1, EXT_NONE, gen_pack, NULL);\n+ return true;\n+}\ndiff --git a/target/riscv/psimd_helper.c b/target/riscv/psimd_helper.c\nindex 5eede48581..4c91800128 100644\n--- a/target/riscv/psimd_helper.c\n+++ b/target/riscv/psimd_helper.c\n@@ -7012,3 +7012,2071 @@ uint64_t HELPER(pm4addau_h)(CPURISCVState *env, uint64_t rs1,\n uint64_t prod3 = (uint64_t)s1_h3 * (uint64_t)s2_h3;\n return d + prod0 + prod1 + prod2 + prod3;\n }\n+\n+/* Double-Width Operations (RV32 only, register pairs) */\n+\n+/**\n+ * PWADD.B - Packed widening byte to halfword addition (RV32)\n+ * rd_pair = {rs1[31:24]+rs2[31:24], rs1[23:16]+rs2[23:16],\n+ * rs1[15:8]+rs2[15:8], rs1[7:0]+rs2[7:0]} (sign-extended)\n+ */\n+uint64_t HELPER(pwadd_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+ int16_t e2 = (int8_t)((rs2 >> (i * 8)) & 0xFF);\n+ int16_t res = e1 + e2;\n+ rd |= ((uint64_t)(uint16_t)res) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWADDA.B - Packed widening byte to halfword addition with accumulate (RV32)\n+ * rd_pair += {rs1[i] + rs2[i]}\n+ */\n+uint64_t HELPER(pwadda_b)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t result = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+ int16_t e2 = (int8_t)((rs2 >> (i * 8)) & 0xFF);\n+ int16_t acc = (int16_t)((rd >> (i * 16)) & 0xFFFF);\n+ int16_t res = acc + e1 + e2;\n+ result |= ((uint64_t)(uint16_t)res) << (i * 16);\n+ }\n+\n+ return result;\n+}\n+\n+/**\n+ * PWADDU.B - Packed widening byte to halfword unsigned addition (RV32)\n+ */\n+uint64_t HELPER(pwaddu_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+ uint16_t e2 = (uint8_t)((rs2 >> (i * 8)) & 0xFF);\n+ uint16_t res = e1 + e2;\n+ rd |= ((uint64_t)res) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWADDAU.B - Packed widening byte to halfword unsigned addition\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwaddau_b)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t result = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+ uint16_t e2 = (uint8_t)((rs2 >> (i * 8)) & 0xFF);\n+ uint16_t acc = (uint16_t)((rd >> (i * 16)) & 0xFFFF);\n+ uint16_t res = acc + e1 + e2;\n+ result |= ((uint64_t)res) << (i * 16);\n+ }\n+\n+ return result;\n+}\n+\n+/**\n+ * PWSUB.B - Packed widening byte to halfword subtraction (RV32)\n+ */\n+uint64_t HELPER(pwsub_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+ int16_t e2 = (int8_t)((rs2 >> (i * 8)) & 0xFF);\n+ int16_t res = e1 - e2;\n+ rd |= ((uint64_t)(uint16_t)res) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSUBA.B - Packed widening byte to halfword subtraction\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwsuba_b)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t result = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+ int16_t e2 = (int8_t)((rs2 >> (i * 8)) & 0xFF);\n+ int16_t acc = (int16_t)((rd >> (i * 16)) & 0xFFFF);\n+ int16_t res = acc + (e1 - e2);\n+ result |= ((uint64_t)(uint16_t)res) << (i * 16);\n+ }\n+\n+ return result;\n+}\n+\n+/**\n+ * PWSUBU.B - Packed widening byte to halfword unsigned subtraction (RV32)\n+ */\n+uint64_t HELPER(pwsubu_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+ uint16_t e2 = (uint8_t)((rs2 >> (i * 8)) & 0xFF);\n+ uint16_t res = e1 - e2;\n+ rd |= ((uint64_t)res) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSUBAU.B - Packed widening byte to halfword unsigned subtraction\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwsubau_b)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t result = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+ uint16_t e2 = (uint8_t)((rs2 >> (i * 8)) & 0xFF);\n+ uint16_t acc = (uint16_t)((rd >> (i * 16)) & 0xFFFF);\n+ uint16_t res = acc + (e1 - e2);\n+ result |= ((uint64_t)res) << (i * 16);\n+ }\n+\n+ return result;\n+}\n+\n+/**\n+ * PWSLLI.B - Packed widening shift left immediate (byte to halfword)\n+ */\n+uint64_t HELPER(pwslli_b)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+ uint64_t rd = 0;\n+ uint8_t shamt = imm & 0x0F;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+ uint16_t res = e1 << shamt;\n+ rd |= ((uint64_t)res) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSLL.BS - Packed widening shift left from register (byte to halfword)\n+ */\n+uint64_t HELPER(pwsll_bs)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+ uint8_t shamt = rs2 & 0x1F;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+ uint16_t res = e1 << shamt;\n+ rd |= ((uint64_t)res) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSLAI.B - Packed widening signed shift left immediate (byte to halfword)\n+ */\n+uint64_t HELPER(pwslai_b)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+ uint64_t rd = 0;\n+ uint8_t shamt = imm & 0x0F;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+ int16_t res = e1 << shamt;\n+ rd |= ((uint64_t)(uint16_t)res) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSLA.BS - Packed widening signed shift left from register (byte to halfword)\n+ */\n+uint64_t HELPER(pwsla_bs)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+ uint8_t shamt = rs2 & 0x1F;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+ int16_t res = e1 << shamt;\n+ rd |= ((uint64_t)(uint16_t)res) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWADD.H - Packed widening halfword to word addition (RV32)\n+ */\n+uint64_t HELPER(pwadd_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ int32_t e2 = (int16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+ int32_t res = e1 + e2;\n+ rd |= ((uint64_t)(uint32_t)res) << (i * 32);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWADDA.H - Packed widening halfword to word addition with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwadda_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t result = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ int32_t e2 = (int16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+ int32_t acc = (int32_t)((rd >> (i * 32)) & 0xFFFFFFFF);\n+ int32_t res = acc + e1 + e2;\n+ result |= ((uint64_t)(uint32_t)res) << (i * 32);\n+ }\n+\n+ return result;\n+}\n+\n+/**\n+ * PWADDU.H - Packed widening halfword to word unsigned addition (RV32)\n+ */\n+uint64_t HELPER(pwaddu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ uint32_t e2 = (uint16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+ uint32_t res = e1 + e2;\n+ rd |= ((uint64_t)res) << (i * 32);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWADDAU.H - Packed widening halfword to word unsigned addition\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwaddau_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t result = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ uint32_t e2 = (uint16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+ uint32_t acc = (uint32_t)((rd >> (i * 32)) & 0xFFFFFFFF);\n+ uint32_t res = acc + e1 + e2;\n+ result |= ((uint64_t)res) << (i * 32);\n+ }\n+\n+ return result;\n+}\n+\n+/**\n+ * PWSUB.H - Packed widening halfword to word subtraction (RV32)\n+ */\n+uint64_t HELPER(pwsub_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ int32_t e2 = (int16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+ int32_t res = e1 - e2;\n+ rd |= ((uint64_t)(uint32_t)res) << (i * 32);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSUBA.H - Packed widening halfword to word subtraction\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwsuba_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t result = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ int32_t e2 = (int16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+ int32_t acc = (int32_t)((rd >> (i * 32)) & 0xFFFFFFFF);\n+ int32_t res = acc + (e1 - e2);\n+ result |= ((uint64_t)(uint32_t)res) << (i * 32);\n+ }\n+\n+ return result;\n+}\n+\n+/**\n+ * PWSUBU.H - Packed widening halfword to word unsigned subtraction (RV32)\n+ */\n+uint64_t HELPER(pwsubu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ uint32_t e2 = (uint16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+ uint32_t res = e1 - e2;\n+ rd |= ((uint64_t)res) << (i * 32);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSUBAU.H - Packed widening halfword to word unsigned subtraction\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwsubau_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t result = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ uint32_t e2 = (uint16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+ uint32_t acc = (uint32_t)((rd >> (i * 32)) & 0xFFFFFFFF);\n+ uint32_t res = acc + (e1 - e2);\n+ result |= ((uint64_t)res) << (i * 32);\n+ }\n+\n+ return result;\n+}\n+\n+/**\n+ * PWSLLI.H - Packed widening shift left immediate (halfword to word)\n+ */\n+uint64_t HELPER(pwslli_h)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+ uint64_t rd = 0;\n+ uint8_t shamt = imm & 0x1F;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ uint32_t res = e1 << shamt;\n+ rd |= ((uint64_t)res) << (i * 32);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSLL.HS - Packed widening shift left from register (halfword to word)\n+ */\n+uint64_t HELPER(pwsll_hs)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+ uint8_t shamt = rs2 & 0x1F;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ uint32_t res = e1 << shamt;\n+ rd |= ((uint64_t)res) << (i * 32);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSLAI.H - Packed widening signed shift left immediate (halfword to word)\n+ */\n+uint64_t HELPER(pwslai_h)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+ uint64_t rd = 0;\n+ uint8_t shamt = imm & 0x1F;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ int32_t res = e1 << shamt;\n+ rd |= ((uint64_t)(uint32_t)res) << (i * 32);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PWSLA.HS - Packed widening signed shift left from register (halfword to word)\n+ */\n+uint64_t HELPER(pwsla_hs)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+ uint8_t shamt = rs2 & 0x1F;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+ int32_t res = e1 << shamt;\n+ rd |= ((uint64_t)(uint32_t)res) << (i * 32);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * WADD - Widening signed addition (RV32)\n+ */\n+uint64_t HELPER(wadd)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ int64_t a = (int32_t)rs1;\n+ int64_t b = (int32_t)rs2;\n+ return (uint64_t)(a + b);\n+}\n+\n+/**\n+ * WADDA - Widening signed addition with accumulate (RV32)\n+ */\n+uint64_t HELPER(wadda)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ int64_t a = (int32_t)rs1;\n+ int64_t b = (int32_t)rs2;\n+ int64_t acc = (int64_t)rd;\n+ return (uint64_t)(acc + a + b);\n+}\n+\n+/**\n+ * WADDU - Widening unsigned addition (RV32)\n+ */\n+uint64_t HELPER(waddu)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t a = rs1;\n+ uint64_t b = rs2;\n+ return a + b;\n+}\n+\n+/**\n+ * WADDAU - Widening unsigned addition with accumulate (RV32)\n+ */\n+uint64_t HELPER(waddau)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t acc = rd;\n+ return acc + rs1 + rs2;\n+}\n+\n+/**\n+ * WSUB - Widening signed subtraction (RV32)\n+ */\n+uint64_t HELPER(wsub)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ int64_t a = (int32_t)rs1;\n+ int64_t b = (int32_t)rs2;\n+ return (uint64_t)(a - b);\n+}\n+\n+/**\n+ * WSUBA - Widening signed subtraction with accumulate (RV32)\n+ */\n+uint64_t HELPER(wsuba)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ int64_t a = (int32_t)rs1;\n+ int64_t b = (int32_t)rs2;\n+ int64_t acc = (int64_t)rd;\n+ return (uint64_t)(acc + a - b);\n+}\n+\n+/**\n+ * WSUBU - Widening unsigned subtraction (RV32)\n+ */\n+uint64_t HELPER(wsubu)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t a = rs1;\n+ uint64_t b = rs2;\n+ return a - b;\n+}\n+\n+/**\n+ * WSUBAU - Widening unsigned subtraction with accumulate (RV32)\n+ */\n+uint64_t HELPER(wsubau)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t rd)\n+{\n+ uint64_t acc = rd;\n+ return acc + rs1 - rs2;\n+}\n+\n+/**\n+ * WSLLI - Widening logical shift left immediate (RV32)\n+ */\n+uint64_t HELPER(wslli)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+ uint64_t a = rs1;\n+ uint8_t shamt = imm & 0x3F;\n+ return a << shamt;\n+}\n+\n+/**\n+ * WSLL - Widening logical shift left from register (RV32)\n+ */\n+uint64_t HELPER(wsll)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t a = rs1;\n+ uint8_t shamt = rs2 & 0x3F;\n+ return a << shamt;\n+}\n+\n+/**\n+ * WSLAI - Widening signed shift left immediate (RV32)\n+ */\n+uint64_t HELPER(wslai)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+ int64_t a = (int32_t)rs1;\n+ uint8_t shamt = imm & 0x3F;\n+ return (uint64_t)(a << shamt);\n+}\n+\n+/**\n+ * WSLA - Widening signed shift left from register (RV32)\n+ */\n+uint64_t HELPER(wsla)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ int64_t a = (int32_t)rs1;\n+ uint8_t shamt = rs2 & 0x3F;\n+ return (uint64_t)(a << shamt);\n+}\n+\n+/**\n+ * WZIP8P - Double-width interleave bytes (RV32)\n+ */\n+uint64_t HELPER(wzip8p)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint64_t b1 = (uint64_t)EXTRACT8(rs1, i) << 16 * i;\n+ uint64_t b2 = (uint64_t)EXTRACT8(rs2, i) << (16 * i + 8);\n+ rd = rd | b2 | b1;\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * WZIP16P - Double-width interleave halfwords (RV32)\n+ */\n+uint64_t HELPER(wzip16p)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint64_t h1 = (uint64_t)EXTRACT16(rs1, i) << (32 * i);\n+ uint64_t h2 = (uint64_t)EXTRACT16(rs2, i) << (32 * i + 16);\n+ rd = rd | h2 | h1;\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PREDSUM.DBS - Double-width signed reduction sum of bytes (RV32)\n+ */\n+uint32_t HELPER(predsum_dbs)(CPURISCVState *env, uint32_t rs1_lo,\n+ uint32_t rs1_hi, uint32_t rs2)\n+{\n+ int64_t sum = (int32_t)rs2;\n+ int64_t s1 = ((int64_t)rs1_hi << 32) | rs1_lo;\n+\n+ for (int i = 0; i < 8; i++) {\n+ int8_t b = (int8_t)((s1 >> (i * 8)) & 0xFF);\n+ sum += b;\n+ }\n+\n+ return (uint32_t)sum;\n+}\n+\n+/**\n+ * PREDSUMU.DBS - Double-width unsigned reduction sum of bytes (RV32)\n+ */\n+uint32_t HELPER(predsumu_dbs)(CPURISCVState *env, uint32_t rs1_lo,\n+ uint32_t rs1_hi, uint32_t rs2)\n+{\n+ uint64_t sum = rs2;\n+ uint64_t s1 = ((uint64_t)rs1_hi << 32) | rs1_lo;\n+\n+ for (int i = 0; i < 8; i++) {\n+ uint8_t b = (uint8_t)((s1 >> (i * 8)) & 0xFF);\n+ sum += b;\n+ }\n+\n+ return (uint32_t)sum;\n+}\n+\n+/**\n+ * PREDSUM.DHS - Double-width signed reduction sum of halfwords (RV32)\n+ */\n+uint32_t HELPER(predsum_dhs)(CPURISCVState *env, uint32_t rs1_lo,\n+ uint32_t rs1_hi, uint32_t rs2)\n+{\n+ int64_t sum = (int32_t)rs2;\n+ int64_t s1 = ((int64_t)rs1_hi << 32) | rs1_lo;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int16_t h = (int16_t)((s1 >> (i * 16)) & 0xFFFF);\n+ sum += h;\n+ }\n+\n+ return (uint32_t)sum;\n+}\n+\n+/**\n+ * PREDSUMU.DHS - Double-width unsigned reduction sum of halfwords (RV32)\n+ */\n+uint32_t HELPER(predsumu_dhs)(CPURISCVState *env, uint32_t rs1_lo,\n+ uint32_t rs1_hi, uint32_t rs2)\n+{\n+ uint64_t sum = rs2;\n+ uint64_t s1 = ((uint64_t)rs1_hi << 32) | rs1_lo;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t h = (uint16_t)((s1 >> (i * 16)) & 0xFFFF);\n+ sum += h;\n+ }\n+\n+ return (uint32_t)sum;\n+}\n+\n+\n+/* Narrowing Operations (RV32 only, register pair sources) */\n+\n+/**\n+ * PNSRLI.B - Narrowing logical shift right immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrli_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ uint8_t result = (s1_h >> (shamt & 0xF)) & 0xFF;\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PNSRL.BS - Narrowing logical shift right from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrl_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ uint32_t s1_h_z32 = (uint32_t)s1_h;\n+ uint8_t result = (s1_h_z32 >> (shamt & 0x1F)) & 0xFF;\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PNSRAI.B - Narrowing arithmetic shift right immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrai_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ int32_t s1_h_s32 = (int32_t)(int16_t)s1_h;\n+ int32_t s1_h_s24 = (s1_h_s32 << 8) >> 8;\n+ uint8_t result = s1_h_s24 >> (shamt & 0xF) & 0xFF;\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PNSRA.BS - Narrowing arithmetic shift right from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsra_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ int64_t s1_h_s64 = (int64_t)(int16_t)s1_h;\n+ s1_h_s64 = (s1_h_s64 << 24) >> 24;\n+ uint8_t result = s1_h_s64 >> (shamt & 0x1F) & 0xFF;\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PNSRARI.B - Narrowing arithmetic shift right with rounding\n+ * immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrari_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ int32_t s1_h_s32 = (int32_t)(int16_t)s1_h;\n+ int32_t s1_h_s24 = (s1_h_s32 << 8) >> 8;\n+ uint32_t shx_25bit = ((uint32_t)s1_h_s24 << 1);\n+ uint32_t shx = (shx_25bit >> (shamt & 0xF)) & 0x1FF;\n+ uint8_t result = ((shx + 1) >> 1) & 0xFF;\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PNSRAR.BS - Narrowing arithmetic shift right with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrar_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ int64_t s1_h_s64 = (int64_t)(int16_t)s1_h;\n+ int64_t s1_h_s40 = (s1_h_s64 << 24) >> 24;\n+ uint64_t shx_41bit = ((uint64_t)s1_h_s40 << 1);\n+ uint64_t shx = (shx_41bit >> (shamt & 0x1F)) & 0x1FF;\n+ uint8_t result = ((shx + 1) >> 1) & 0xFF;\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPI.B - Narrowing clip signed (64-bit to 32-bit) with immediate shift\n+ */\n+uint32_t HELPER(pnclipi_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ int32_t s1_h_s32 = (int32_t)(int16_t)s1_h;\n+ int16_t shx = (int16_t)(s1_h_s32 >> (shamt & 0xF));\n+ uint8_t result = 0;\n+\n+ if (shx < -128) {\n+ sat = 1;\n+ result = 0x80; /* -128 */\n+ } else if (shx > 127) {\n+ sat = 1;\n+ result = 0x7F; /* 127 */\n+ } else {\n+ result = (uint8_t)shx;\n+ }\n+ rd |= ((uint32_t)result << (i * 8));\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPRI.B - Narrowing clip signed with rounding\n+ * (64-bit to 32-bit) with immediate shift\n+ */\n+uint32_t HELPER(pnclipri_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ int32_t s1_h_s32 = (int32_t)(int16_t)s1_h;\n+ uint64_t shx_33bit = ((uint32_t)s1_h_s32 << 1);\n+ uint32_t shx = (shx_33bit >> (shamt & 0xF)) & 0x1FFFF;\n+ uint16_t round_shx = (uint16_t)((shx + 1) >> 1);\n+ int16_t round_shx_s = (int16_t)round_shx;\n+ uint8_t result = 0;\n+\n+ if (round_shx_s < -128) {\n+ sat = 1;\n+ result = 0x80;\n+ } else if (round_shx_s > 127) {\n+ sat = 1;\n+ result = 0x7F;\n+ } else {\n+ result = (uint8_t)round_shx;\n+ }\n+\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPIU.B - Narrowing clip unsigned (64-bit to 32-bit) with immediate shift\n+ */\n+uint32_t HELPER(pnclipiu_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ uint16_t shx = s1_h >> (shamt & 0xF);\n+ uint8_t result = 0;\n+\n+ if (shx > 0x00FF) {\n+ sat = 1;\n+ result = 0xFF;\n+ } else {\n+ result = (uint8_t)(shx & 0xFF);\n+ }\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPRIU.B - Narrowing clip unsigned with rounding\n+ * (64-bit to 32-bit) with immediate shift\n+ */\n+uint32_t HELPER(pnclipriu_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ uint32_t shx_17bit = ((uint32_t)s1_h << 1);\n+ uint32_t shx = shx_17bit >> (shamt & 0xF);\n+ uint16_t round_shx = (uint16_t)((shx + 1) >> 1);\n+ uint8_t result = 0;\n+\n+ if (round_shx > 0x00FF) {\n+ sat = 1;\n+ result = 0xFF;\n+ } else {\n+ result = (uint8_t)(round_shx & 0xFF);\n+ }\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIP.BS - Narrowing clip signed from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnclip_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ int64_t s1_h_s64 = (int64_t)(int16_t)s1_h;\n+ int64_t s1_h_s48 = (s1_h_s64 << 16) >> 16;\n+ int16_t shx = (int16_t)(s1_h_s48 >> (shamt & 0x1F));\n+ uint8_t result = 0;\n+\n+ if (shx < -128) {\n+ sat = 1;\n+ result = 0x80;\n+ } else if (shx > 127) {\n+ sat = 1;\n+ result = 0x7F;\n+ } else {\n+ result = (uint8_t)shx;\n+ }\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPR.BS - Narrowing clip signed with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnclipr_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ int64_t s1_h_s64 = (int64_t)(int16_t)s1_h;\n+ int64_t s1_h_s48 = (s1_h_s64 << 16) >> 16;\n+ uint64_t shx_49bit = ((uint64_t)s1_h_s48 << 1);\n+ uint32_t shx = (shx_49bit >> (shamt & 0x1F)) & 0x1FFFF;\n+ uint16_t round_shx = (uint16_t)((shx + 1) >> 1);\n+ int16_t round_shx_s = (int16_t)round_shx;\n+ uint8_t result = 0;\n+\n+ if (round_shx_s < -128) {\n+ sat = 1;\n+ result = 0x80;\n+ } else if (round_shx_s > 127) {\n+ sat = 1;\n+ result = 0x7F;\n+ } else {\n+ result = (uint8_t)round_shx;\n+ }\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPU.BS - Narrowing clip unsigned from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnclipu_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ uint32_t s1_h_z32 = (uint32_t)s1_h;\n+ uint16_t shx = (s1_h_z32 >> (shamt & 0x1F)) & 0xFFFF;\n+ uint8_t result = 0;\n+\n+ if (shx > 0x00FF) {\n+ sat = 1;\n+ result = 0xFF;\n+ } else {\n+ result = (uint8_t)(shx & 0xFF);\n+ }\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPRU.BS - Narrowing clip unsigned with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnclipru_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+ uint32_t s1_h_z32 = (uint32_t)s1_h;\n+ uint64_t shx_33bit = ((uint64_t)s1_h_z32 << 1);\n+ uint32_t shx = (shx_33bit >> (shamt & 0x1F)) & 0x1FFFF;\n+ uint16_t round_shx = (uint16_t)((shx + 1) >> 1);\n+ uint8_t result = 0;\n+\n+ if (round_shx > 0x00FF) {\n+ sat = 1;\n+ result = 0xFF;\n+ } else {\n+ result = (uint8_t)(round_shx & 0xFF);\n+ }\n+ rd |= ((uint32_t)result) << (i * 8);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNSRLI.H - Narrowing logical shift right immediate\n+ * (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrli_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ uint32_t s1_low = (uint32_t)(s1 & 0xFFFFFFFF);\n+ uint32_t s1_high = (uint32_t)((s1 >> 32) & 0xFFFFFFFF);\n+\n+ uint16_t rd_low = (s1_low >> (shamt & 0x1F)) & 0xFFFF;\n+ uint16_t rd_high = (s1_high >> (shamt & 0x1F)) & 0xFFFF;\n+\n+ rd = ((uint32_t)rd_high << 16) | rd_low;\n+ return rd;\n+}\n+\n+/**\n+ * PNSRAI.H - Narrowing arithmetic shift right immediate\n+ * (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrai_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ uint32_t s1_low = (uint32_t)(s1 & 0xFFFFFFFF);\n+ int64_t s1_low_s64 = (int64_t)(int32_t)s1_low;\n+ int64_t s1_low_s48 = (s1_low_s64 << 16) >> 16;\n+\n+ uint32_t s1_high = (uint32_t)((s1 >> 32) & 0xFFFFFFFF);\n+ int64_t s1_high_s64 = (int64_t)(int32_t)s1_high;\n+ int64_t s1_high_s48 = (s1_high_s64 << 16) >> 16;\n+\n+ uint16_t rd_low = (s1_low_s48 >> (shamt & 0x1F)) & 0xFFFF;\n+ uint16_t rd_high = (s1_high_s48 >> (shamt & 0x1F)) & 0xFFFF;\n+\n+ rd = ((uint32_t)rd_high << 16) | rd_low;\n+ return rd;\n+}\n+\n+/**\n+ * PNSRARI.H - Narrowing arithmetic shift right with rounding\n+ * immediate (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrari_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t s1_w = (s1 >> (i * 32)) & 0xFFFFFFFF;\n+ int64_t s1_w_s64 = (int64_t)(int32_t)s1_w;\n+ int64_t s1_w_s48 = (s1_w_s64 << 16) >> 16;\n+ uint64_t shx_49bit = ((uint64_t)s1_w_s48 << 1);\n+ uint32_t shx = (shx_49bit >> (shamt & 0x1F)) & 0x1FFFF;\n+ rd |= ((uint16_t)((shx + 1) >> 1)) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PNSRL.HS - Narrowing logical shift right from register\n+ * (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrl_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ uint32_t s1_low = (uint32_t)(s1 & 0xFFFFFFFF);\n+ uint32_t s1_high = (uint32_t)((s1 >> 32) & 0xFFFFFFFF);\n+\n+ uint16_t rd_low = (s1_low >> (shamt & 0x1F)) & 0xFFFF;\n+ uint16_t rd_high = (s1_high >> (shamt & 0x1F)) & 0xFFFF;\n+\n+ rd = ((uint32_t)rd_high << 16) | rd_low;\n+ return rd;\n+}\n+\n+/**\n+ * PNSRA.HS - Narrowing arithmetic shift right from register\n+ * (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsra_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ uint32_t s1_low = (uint32_t)(s1 & 0xFFFFFFFF);\n+ uint32_t s1_high = (uint32_t)((s1 >> 32) & 0xFFFFFFFF);\n+\n+ uint16_t rd_low = (s1_low >> (shamt & 0x1F)) & 0xFFFF;\n+ uint16_t rd_high = (s1_high >> (shamt & 0x1F)) & 0xFFFF;\n+\n+ rd = ((uint32_t)rd_high << 16) | rd_low;\n+ return rd;\n+}\n+\n+/**\n+ * PNSRAR.HS - Narrowing arithmetic shift right with rounding\n+ * from register (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrar_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t s1_w = (s1 >> (i * 32)) & 0xFFFFFFFF;\n+ int64_t s1_w_s64 = (int64_t)(int32_t)s1_w;\n+ int64_t s1_w_s48 = (s1_w_s64 << 16) >> 16;\n+ uint64_t shx_49bit = ((uint64_t)s1_w_s48 << 1);\n+ uint32_t shx = (shx_49bit >> (shamt & 0x1F)) & 0x1FFFF;\n+ rd |= ((uint16_t)((shx + 1) >> 1)) << (i * 16);\n+ }\n+\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIP.HS - Narrowing signed clip from register shift (word to halfword)\n+ * For each word: arithmetic right shift, clip to signed 16-bit\n+ * shx = (int32_t)rs1[i] >> shamt\n+ * result = sat16(shx)\n+ */\n+uint32_t HELPER(pnclip_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+ uint8_t shift = shamt & 0x1F;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t s1_w = EXTRACT32(s1, i);\n+ int64_t s1_w_s64 = (int64_t)(int32_t)s1_w;\n+ int32_t shx = (int32_t)(s1_w_s64 >> shift);\n+ uint16_t result;\n+\n+ if (shx < -32768) {\n+ sat = 1;\n+ result = 0x8000;\n+ } else if (shx > 32767) {\n+ sat = 1;\n+ result = 0x7FFF;\n+ } else {\n+ result = (uint16_t)shx;\n+ }\n+\n+ rd = INSERT16(rd, result, i);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPR.HS - Narrowing signed clip with rounding\n+ * from register (word to halfword)\n+ * For each word: ((int32_t)rs1[i] << 1) >> shamt, round, clip to signed 16-bit\n+ * shx_65bit = ((int64_t)rs1[i] << 1)\n+ * shx = (shx_65bit >> shamt) & mask\n+ * round = (shx + 1) >> 1\n+ * result = sat16(round)\n+ */\n+uint32_t HELPER(pnclipr_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+ uint8_t shift = shamt & 0x1F;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t s1_w = EXTRACT32(s1, i);\n+ int64_t s1_w_s64 = (int64_t)(int32_t)s1_w;\n+ __uint128_t shx_65bit = (__uint128_t)s1_w_s64 << 1;\n+ uint64_t shx = (uint64_t)(shx_65bit >> shift) & 0x1FFFFFFFF;\n+ int32_t round_shx = (int32_t)((shx + 1) >> 1);\n+ uint16_t result;\n+\n+ if (round_shx < -32768) {\n+ sat = 1;\n+ result = 0x8000;\n+ } else if (round_shx > 32767) {\n+ sat = 1;\n+ result = 0x7FFF;\n+ } else {\n+ result = (uint16_t)round_shx;\n+ }\n+\n+ rd = INSERT16(rd, result, i);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPI.H - Narrowing signed clip from immediate shift (word to halfword)\n+ * For each word: rs1[i] >> imm, clip to signed 16-bit\n+ */\n+uint32_t HELPER(pnclipi_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ return HELPER(pnclip_hs)(env, s1, shamt);\n+}\n+\n+/**\n+ * PNCLIPRI.H - Narrowing signed clip with rounding\n+ * from immediate shift (word to halfword)\n+ * For each word: (rs1[i] << 1) >> imm, round, clip to signed 16-bit\n+ */\n+uint32_t HELPER(pnclipri_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ return HELPER(pnclipr_hs)(env, s1, shamt);\n+}\n+\n+/**\n+ * PNCLIPU.HS - Narrowing unsigned clip from register shift (word to halfword)\n+ * For each word: shift right, clip to unsigned 16-bit\n+ * shx = rs1[i] >> shamt\n+ * result = (shx > 65535) ? 0xFFFF : shx\n+ */\n+uint32_t HELPER(pnclipu_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+ uint8_t shift = shamt & 0x1F;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t s1_w = EXTRACT32(s1, i);\n+ uint32_t shx = s1_w >> shift;\n+ uint16_t result;\n+\n+ if (shx > 65535) {\n+ sat = 1;\n+ result = 0xFFFF;\n+ } else {\n+ result = (uint16_t)shx;\n+ }\n+\n+ rd = INSERT16(rd, result, i);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPRU.HS - Narrowing unsigned clip with rounding\n+ * from register (word to halfword)\n+ * For each word: (rs1[i] << 1) >> shamt, round, clip to unsigned 16-bit\n+ * shx = ((rs1[i] << 1) >> shamt)\n+ * round = (shx + 1) >> 1\n+ * result = (round > 65535) ? 0xFFFF : round\n+ */\n+uint32_t HELPER(pnclipru_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint32_t rd = 0;\n+ int sat = 0;\n+ uint8_t shift = shamt & 0x1F;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint32_t s1_w = EXTRACT32(s1, i);\n+ uint64_t shx_33bit = (uint64_t)s1_w << 1;\n+ uint64_t shx = shx_33bit >> shift;\n+ uint32_t round_shx = (uint32_t)((shx + 1) >> 1);\n+ uint16_t result;\n+\n+ if (round_shx > 65535) {\n+ sat = 1;\n+ result = 0xFFFF;\n+ } else {\n+ result = (uint16_t)round_shx;\n+ }\n+\n+ rd = INSERT16(rd, result, i);\n+ }\n+\n+ if (sat) {\n+ env->vxsat = 1;\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PNCLIPIU.H - Narrowing unsigned clip from immediate shift (word to halfword)\n+ * For each word: rs1[i] >> imm, clip to unsigned 16-bit\n+ */\n+uint32_t HELPER(pnclipiu_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ return HELPER(pnclipu_hs)(env, s1, shamt);\n+}\n+\n+/**\n+ * PNCLIPRIU.H - Narrowing unsigned clip with rounding\n+ * from immediate shift (word to halfword)\n+ * For each word: (rs1[i] << 1) >> imm, round, clip to unsigned 16-bit\n+ */\n+uint32_t HELPER(pnclipriu_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ return HELPER(pnclipru_hs)(env, s1, shamt);\n+}\n+\n+/**\n+ * NSRLI - Narrowing logical shift right immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrli)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ return (s1 >> (shamt & 0x3F)) & 0xFFFFFFFF;\n+}\n+\n+/**\n+ * NSRAI - Narrowing arithmetic shift right immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrai)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+ __int128_t s1_s96 = (s1_s128 << 32) >> 32;\n+ return (uint32_t)(s1_s96 >> (shamt & 0x3F)) & 0xFFFFFFFF;\n+}\n+\n+/**\n+ * NSRARI - Narrowing arithmetic shift right with rounding\n+ * immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrari)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+ __int128_t s1_s96 = (s1_s128 << 32) >> 32;\n+ __uint128_t shx_97bit = ((__uint128_t)s1_s96 << 1);\n+ uint64_t shx = (uint64_t)(shx_97bit >> (shamt & 0x3F)) & 0x1FFFFFFFF;\n+ return (uint32_t)((shx + 1) >> 1);\n+}\n+\n+/**\n+ * NSRL - Narrowing logical shift right from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrl)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ return (s1 >> (shamt & 0x3F)) & 0xFFFFFFFF;\n+}\n+\n+/**\n+ * NSRA - Narrowing arithmetic shift right from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsra)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+ __int128_t s1_s96 = (s1_s128 << 32) >> 32;\n+ return (uint32_t)(s1_s96 >> (shamt & 0x3F)) & 0xFFFFFFFF;\n+}\n+\n+/**\n+ * NSRAR - Narrowing arithmetic shift right with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrar)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+ __int128_t s1_s96 = (s1_s128 << 32) >> 32;\n+ __uint128_t shx_97bit = ((__uint128_t)s1_s96 << 1);\n+ uint64_t shx = (uint64_t)(shx_97bit >> (shamt & 0x3F)) & 0x1FFFFFFFF;\n+ return (uint32_t)((shx + 1) >> 1);\n+}\n+\n+/**\n+ * NCLIPI - Narrowing clip signed with immediate shift (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipi)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+ int64_t shx = (int64_t)(s1_s128 >> (shamt & 0x3F));\n+\n+ if (shx < -2147483648LL) {\n+ env->vxsat = 1;\n+ return 0x80000000U;\n+ } else if (shx > 2147483647LL) {\n+ env->vxsat = 1;\n+ return 0x7FFFFFFFU;\n+ } else {\n+ return (uint32_t)(shx & 0xFFFFFFFF);\n+ }\n+}\n+\n+/**\n+ * NCLIPRI - Narrowing clip signed with rounding and immediate\n+ * shift (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipri)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ typedef struct {\n+ __uint128_t low;\n+ uint8_t high;\n+ } Uint129;\n+\n+ Uint129 left_shift_1(__int128_t s1_s128)\n+ {\n+ Uint129 result;\n+ __uint128_t us1 = (__uint128_t)s1_s128;\n+ result.low = us1 << 1;\n+ result.high = (us1 >> 127) & 0x1;\n+ return result;\n+ }\n+\n+ Uint129 right_shift(Uint129 val, uint32_t smt)\n+ {\n+ Uint129 result;\n+ if (smt == 0) {\n+ return val;\n+ } else if (smt >= 129) {\n+ result.low = 0;\n+ result.high = 0;\n+ } else if (smt == 128) {\n+ result.low = val.high;\n+ result.high = 0;\n+ } else {\n+ result.low = (val.low >> smt) |\n+ ((__uint128_t)val.high << (128 - smt));\n+ result.high = (val.high >> smt);\n+ }\n+ return result;\n+ }\n+\n+ __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+ Uint129 shx_129bit = left_shift_1(s1_s128);\n+ Uint129 shx = right_shift(shx_129bit, shamt & 0x3F);\n+ int64_t round_shx = (int64_t)((shx.low + 1) >> 1);\n+\n+ if (round_shx < -2147483648LL) {\n+ env->vxsat = 1;\n+ return 0x80000000U;\n+ } else if (round_shx > 2147483647LL) {\n+ env->vxsat = 1;\n+ return 0x7FFFFFFFU;\n+ } else {\n+ return (uint32_t)round_shx;\n+ }\n+}\n+\n+/**\n+ * NCLIPIU - Narrowing clip unsigned with immediate shift (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipiu)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint64_t shx = s1 >> (shamt & 0x3F);\n+\n+ if (shx > 4294967295ULL) {\n+ env->vxsat = 1;\n+ return 0xFFFFFFFFU;\n+ } else {\n+ return (uint32_t)(shx & 0xFFFFFFFF);\n+ }\n+}\n+\n+/**\n+ * NCLIPRIU - Narrowing clip unsigned with rounding and immediate\n+ * shift (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipriu)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ __uint128_t shx_65bit = (s1 << 1);\n+ __uint128_t shx = shx_65bit >> (shamt & 0x3F);\n+ uint64_t round_shx = (shx + 1) >> 1;\n+\n+ if (round_shx > 4294967295ULL) {\n+ env->vxsat = 1;\n+ return 0xFFFFFFFFU;\n+ } else {\n+ return (uint32_t)(round_shx & 0xFFFFFFFF);\n+ }\n+}\n+\n+/**\n+ * NCLIP - Narrowing clip signed from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclip)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+ int64_t shx = (int64_t)(s1_s128 >> (shamt & 0x3F));\n+\n+ if (shx < -2147483648LL) {\n+ env->vxsat = 1;\n+ return 0x80000000U;\n+ } else if (shx > 2147483647LL) {\n+ env->vxsat = 1;\n+ return 0x7FFFFFFFU;\n+ } else {\n+ return (uint32_t)(shx & 0xFFFFFFFF);\n+ }\n+}\n+\n+/**\n+ * NCLIPR - Narrowing clip signed with rounding from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipr)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ typedef struct {\n+ __uint128_t low;\n+ uint8_t high;\n+ } Uint129;\n+\n+ Uint129 left_shift_1(__int128_t s1_s128)\n+ {\n+ Uint129 result;\n+ __uint128_t us1 = (__uint128_t)s1_s128;\n+ result.low = us1 << 1;\n+ result.high = (us1 >> 127) & 0x1;\n+ return result;\n+ }\n+\n+ Uint129 right_shift(Uint129 val, uint32_t smt)\n+ {\n+ Uint129 result;\n+ if (smt == 0) {\n+ return val;\n+ } else if (smt >= 129) {\n+ result.low = 0;\n+ result.high = 0;\n+ } else if (smt == 128) {\n+ result.low = val.high;\n+ result.high = 0;\n+ } else {\n+ result.low = (val.low >> smt) |\n+ ((__uint128_t)val.high << (128 - smt));\n+ result.high = (val.high >> smt);\n+ }\n+ return result;\n+ }\n+\n+ __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+ Uint129 shx_129bit = left_shift_1(s1_s128);\n+ Uint129 shx = right_shift(shx_129bit, shamt & 0x3F);\n+ int64_t round_shx = (int64_t)((shx.low + 1) >> 1);\n+\n+ if (round_shx < -2147483648LL) {\n+ env->vxsat = 1;\n+ return 0x80000000U;\n+ } else if (round_shx > 2147483647LL) {\n+ env->vxsat = 1;\n+ return 0x7FFFFFFFU;\n+ } else {\n+ return (uint32_t)round_shx;\n+ }\n+}\n+\n+/**\n+ * NCLIPU - Narrowing clip unsigned from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipu)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ uint64_t shx = s1 >> (shamt & 0x3F);\n+\n+ if (shx > 4294967295ULL) {\n+ env->vxsat = 1;\n+ return 0xFFFFFFFFU;\n+ } else {\n+ return (uint32_t)(shx & 0xFFFFFFFF);\n+ }\n+}\n+\n+/**\n+ * NCLIPRU - Narrowing clip unsigned with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipru)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+ __uint128_t shx_65bit = (s1 << 1);\n+ __uint128_t shx = shx_65bit >> (shamt & 0x3F);\n+ uint64_t round_shx = (shx + 1) >> 1;\n+\n+ if (round_shx > 4294967295ULL) {\n+ env->vxsat = 1;\n+ return 0xFFFFFFFFU;\n+ } else {\n+ return (uint32_t)(round_shx & 0xFFFFFFFF);\n+ }\n+}\n+\n+/* Multiplication with Even-Odd Register Pairs as Destination (RV32 only) */\n+\n+/**\n+ * PMQWACC.H - Packed Q-format halfword to word multiply accumulate\n+ */\n+uint64_t HELPER(pmqwacc_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int16_t s1_h = (int16_t)EXTRACT16(rs1, i * 2);\n+ int16_t s2_h = (int16_t)EXTRACT16(rs2, i * 2);\n+ int32_t d_w = (int32_t)EXTRACT32(dest, i);\n+ int64_t prod = (int64_t)s1_h * (int64_t)s2_h;\n+ uint32_t res = (uint32_t)(d_w + (int32_t)(prod >> 15));\n+ rd = INSERT32(rd, res, i);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PMQRWACC.H - Packed Q-format halfword to word multiply\n+ * accumulate with rounding\n+ */\n+uint64_t HELPER(pmqrwacc_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int16_t s1_h = (int16_t)EXTRACT16(rs1, i * 2);\n+ int16_t s2_h = (int16_t)EXTRACT16(rs2, i * 2);\n+ int32_t d_w = (int32_t)EXTRACT32(dest, i);\n+ int64_t prod = (int64_t)s1_h * (int64_t)s2_h + (1LL << 14);\n+ uint32_t res = (uint32_t)(d_w + (int32_t)(prod >> 15));\n+ rd = INSERT32(rd, res, i);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMUL.B - Widening byte to halfword multiplication\n+ */\n+uint64_t HELPER(pwmul_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int8_t s1_b = (int8_t)EXTRACT8(rs1, i);\n+ int8_t s2_b = (int8_t)EXTRACT8(rs2, i);\n+ int16_t prod = (int16_t)s1_b * (int16_t)s2_b;\n+ rd |= ((uint64_t)(uint16_t)prod) << (i * 16);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMULSU.B - Widening signed x unsigned byte to halfword multiplication\n+ */\n+uint64_t HELPER(pwmulsu_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ int8_t s1_b = (int8_t)EXTRACT8(rs1, i);\n+ uint8_t s2_b = EXTRACT8(rs2, i);\n+ int16_t prod = (int16_t)s1_b * (uint16_t)s2_b;\n+ rd |= ((uint64_t)(uint16_t)prod) << (i * 16);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMULU.B - Widening unsigned byte to halfword multiplication\n+ */\n+uint64_t HELPER(pwmulu_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 4; i++) {\n+ uint8_t s1_b = EXTRACT8(rs1, i);\n+ uint8_t s2_b = EXTRACT8(rs2, i);\n+ uint16_t prod = (uint16_t)s1_b * (uint16_t)s2_b;\n+ rd |= ((uint64_t)prod) << (i * 16);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMUL.H - Widening halfword to word multiplication\n+ */\n+uint64_t HELPER(pwmul_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int16_t s1_h = (int16_t)EXTRACT16(rs1, i);\n+ int16_t s2_h = (int16_t)EXTRACT16(rs2, i);\n+ int32_t prod = (int32_t)s1_h * (int32_t)s2_h;\n+ rd |= ((uint64_t)(uint32_t)prod) << (i * 32);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMULSU.H - Widening signed x unsigned halfword to word multiplication\n+ */\n+uint64_t HELPER(pwmulsu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int16_t s1_h = (int16_t)EXTRACT16(rs1, i);\n+ uint16_t s2_h = EXTRACT16(rs2, i);\n+ int32_t prod = (int32_t)s1_h * (uint32_t)s2_h;\n+ rd |= ((uint64_t)(uint32_t)prod) << (i * 32);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMULU.H - Widening unsigned halfword to word multiplication\n+ */\n+uint64_t HELPER(pwmulu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint16_t s1_h = EXTRACT16(rs1, i);\n+ uint16_t s2_h = EXTRACT16(rs2, i);\n+ uint32_t prod = (uint32_t)s1_h * (uint32_t)s2_h;\n+ rd |= ((uint64_t)prod) << (i * 32);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMACC.H - Widening multiply accumulate (halfword to word)\n+ */\n+uint64_t HELPER(pwmacc_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int16_t s1_h = (int16_t)EXTRACT16(rs1, i);\n+ int16_t s2_h = (int16_t)EXTRACT16(rs2, i);\n+ int32_t d_w = (int32_t)EXTRACT32(dest, i);\n+ int32_t prod = (int32_t)s1_h * (int32_t)s2_h;\n+ uint32_t res = (uint32_t)(d_w + prod);\n+ rd |= ((uint64_t)res) << (i * 32);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMACCSU.H - Widening signed x unsigned multiply\n+ * accumulate (halfword to word)\n+ */\n+uint64_t HELPER(pwmaccsu_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ int16_t s1_h = (int16_t)EXTRACT16(rs1, i);\n+ uint16_t s2_h = EXTRACT16(rs2, i);\n+ int32_t d_w = (int32_t)EXTRACT32(dest, i);\n+ int32_t prod = (int32_t)s1_h * (uint32_t)s2_h;\n+ uint32_t res = (uint32_t)(d_w + prod);\n+ rd |= ((uint64_t)res) << (i * 32);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * PWMACCU.H - Widening unsigned multiply accumulate (halfword to word)\n+ */\n+uint64_t HELPER(pwmaccu_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ uint64_t rd = 0;\n+\n+ for (int i = 0; i < 2; i++) {\n+ uint16_t s1_h = EXTRACT16(rs1, i);\n+ uint16_t s2_h = EXTRACT16(rs2, i);\n+ uint32_t d_w = EXTRACT32(dest, i);\n+ uint32_t prod = (uint32_t)s1_h * (uint32_t)s2_h;\n+ uint32_t res = d_w + prod;\n+ rd |= ((uint64_t)res) << (i * 32);\n+ }\n+ return rd;\n+}\n+\n+/**\n+ * MQWACC - Q-format word multiply accumulate\n+ */\n+uint64_t HELPER(mqwacc)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ int64_t s1 = (int64_t)(int32_t)rs1;\n+ int64_t s2 = (int64_t)(int32_t)rs2;\n+ int64_t d = (int64_t)dest;\n+ __int128_t prod = (__int128_t)s1 * (__int128_t)s2;\n+ return (uint64_t)(d + (int64_t)(prod >> 31));\n+}\n+\n+/**\n+ * MQRWACC - Q-format word multiply accumulate with rounding\n+ */\n+uint64_t HELPER(mqrwacc)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ int64_t s1 = (int64_t)(int32_t)rs1;\n+ int64_t s2 = (int64_t)(int32_t)rs2;\n+ int64_t d = (int64_t)dest;\n+ __int128_t prod = (__int128_t)s1 * (__int128_t)s2 + (1LL << 30);\n+ return (uint64_t)(d + (int64_t)(prod >> 31));\n+}\n+\n+/**\n+ * WMUL - Widening signed multiplication (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmul)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ return (uint64_t)((int64_t)(int32_t)rs1 * (int64_t)(int32_t)rs2);\n+}\n+\n+/**\n+ * WMULSU - Widening signed x unsigned multiplication (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmulsu)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ return (uint64_t)((int64_t)(int32_t)rs1 * (uint64_t)rs2);\n+}\n+\n+/**\n+ * WMULU - Widening unsigned multiplication (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmulu)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ return (uint64_t)rs1 * (uint64_t)rs2;\n+}\n+\n+/**\n+ * WMACC - Widening multiply accumulate signed (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmacc)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ return (uint64_t)((int64_t)(int32_t)rs1 *\n+ (int64_t)(int32_t)rs2 + (int64_t)dest);\n+}\n+\n+/**\n+ * WMACCSU - Widening multiply accumulate signed x unsigned\n+ * (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmaccsu)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ return (uint64_t)((int64_t)(int32_t)rs1 * (uint64_t)rs2 + (int64_t)dest);\n+}\n+\n+/**\n+ * WMACCU - Widening multiply accumulate unsigned (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmaccu)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ return (uint64_t)rs1 * (uint64_t)rs2 + (uint64_t)dest;\n+}\n+\n+/**\n+ * PM2WADD.H - Add two widening products (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wadd_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+ int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+ int64_t prod0 = (int64_t)s1_h0 * (int64_t)s2_h0;\n+ int64_t prod1 = (int64_t)s1_h1 * (int64_t)s2_h1;\n+ return (uint64_t)(prod0 + prod1);\n+}\n+\n+/**\n+ * PM2WADDSU.H - Add two widening products\n+ * (signed x unsigned, halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2waddsu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ uint16_t s2_h0 = EXTRACT16(rs2, 0);\n+ uint16_t s2_h1 = EXTRACT16(rs2, 1);\n+ int64_t prod0 = (int64_t)s1_h0 * (uint64_t)s2_h0;\n+ int64_t prod1 = (int64_t)s1_h1 * (uint64_t)s2_h1;\n+ return (uint64_t)(prod0 + prod1);\n+}\n+\n+/**\n+ * PM2WADDU.H - Add two widening products (unsigned, halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2waddu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ uint16_t s1_h0 = EXTRACT16(rs1, 0);\n+ uint16_t s1_h1 = EXTRACT16(rs1, 1);\n+ uint16_t s2_h0 = EXTRACT16(rs2, 0);\n+ uint16_t s2_h1 = EXTRACT16(rs2, 1);\n+ uint64_t prod0 = (uint64_t)s1_h0 * (uint64_t)s2_h0;\n+ uint64_t prod1 = (uint64_t)s1_h1 * (uint64_t)s2_h1;\n+ return prod0 + prod1;\n+}\n+\n+/**\n+ * PM2WADDA.H - Add two widening products with accumulate\n+ * (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wadda_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ int16_t s1_h0 = EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = EXTRACT16(rs1, 1);\n+ int16_t s2_h0 = EXTRACT16(rs2, 0);\n+ int16_t s2_h1 = EXTRACT16(rs2, 1);\n+ int64_t d_h = (int64_t)dest;\n+ int64_t mul_00 = (int64_t)s1_h0 * (int64_t)s2_h0;\n+ int64_t mul_11 = (int64_t)s1_h1 * (int64_t)s2_h1;\n+ return (uint64_t)(d_h + mul_00 + mul_11);\n+}\n+\n+/**\n+ * PM2WADDASU.H - Add two widening products with accumulate\n+ * (signed x unsigned, halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2waddasu_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ uint16_t s2_h0 = (uint16_t)EXTRACT16(rs2, 0);\n+ uint16_t s2_h1 = (uint16_t)EXTRACT16(rs2, 1);\n+ int64_t d_h = (int64_t)dest;\n+ int64_t mul_00 = (int64_t)s1_h0 * (uint64_t)s2_h0;\n+ int64_t mul_11 = (int64_t)s1_h1 * (uint64_t)s2_h1;\n+ return (uint64_t)(d_h + mul_00 + mul_11);\n+}\n+\n+/**\n+ * PM2WADDAU.H - Add two widening products with accumulate\n+ * (unsigned, halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2waddau_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ uint16_t s1_h0 = (uint16_t)EXTRACT16(rs1, 0);\n+ uint16_t s1_h1 = (uint16_t)EXTRACT16(rs1, 1);\n+ uint16_t s2_h0 = (uint16_t)EXTRACT16(rs2, 0);\n+ uint16_t s2_h1 = (uint16_t)EXTRACT16(rs2, 1);\n+ uint64_t d_h = (uint64_t)dest;\n+ uint64_t mul_00 = (uint64_t)s1_h0 * (uint64_t)s2_h0;\n+ uint64_t mul_11 = (uint64_t)s1_h1 * (uint64_t)s2_h1;\n+ return (uint64_t)(d_h + mul_00 + mul_11);\n+}\n+\n+/**\n+ * PM2WADD.HX - Add two widening cross products (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wadd_hx)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+ int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+ int64_t prod01 = (int64_t)s1_h0 * (int64_t)s2_h1;\n+ int64_t prod10 = (int64_t)s1_h1 * (int64_t)s2_h0;\n+ return (uint64_t)(prod01 + prod10);\n+}\n+\n+/**\n+ * PM2WADDA.HX - Add two widening cross products with accumulate\n+ * (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wadda_hx)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+ int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+ int64_t d = (int64_t)dest;\n+ int64_t prod01 = (int64_t)s1_h0 * (int64_t)s2_h1;\n+ int64_t prod10 = (int64_t)s1_h1 * (int64_t)s2_h0;\n+ return (uint64_t)(d + prod01 + prod10);\n+}\n+\n+/**\n+ * PM2WSUB.H - Subtract two widening products (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wsub_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+ int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+ int64_t prod0 = (int64_t)s1_h0 * (int64_t)s2_h0;\n+ int64_t prod1 = (int64_t)s1_h1 * (int64_t)s2_h1;\n+ return (uint64_t)(prod0 - prod1);\n+}\n+\n+/**\n+ * PM2WSUB.HX - Subtract two widening cross products (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wsub_hx)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+ int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+ int64_t prod01 = (int64_t)s1_h0 * (int64_t)s2_h1;\n+ int64_t prod10 = (int64_t)s1_h1 * (int64_t)s2_h0;\n+ return (uint64_t)(prod01 - prod10);\n+}\n+\n+/**\n+ * PM2WSUBA.H - Subtract two widening products with accumulate\n+ * (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wsuba_h)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+ int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+ int64_t d = (int64_t)dest;\n+ int64_t prod0 = (int64_t)s1_h0 * (int64_t)s2_h0;\n+ int64_t prod1 = (int64_t)s1_h1 * (int64_t)s2_h1;\n+ return (uint64_t)(d + prod0 - prod1);\n+}\n+\n+/**\n+ * PM2WSUBA.HX - Subtract two widening cross products with accumulate\n+ * (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wsuba_hx)(CPURISCVState *env, uint32_t rs1,\n+ uint32_t rs2, uint64_t dest)\n+{\n+ int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+ int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+ int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+ int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+ int64_t d = (int64_t)dest;\n+ int64_t prod01 = (int64_t)s1_h0 * (int64_t)s2_h1;\n+ int64_t prod10 = (int64_t)s1_h1 * (int64_t)s2_h0;\n+ return (uint64_t)(d + prod01 - prod10);\n+}\n", "prefixes": [ "13/14" ] }