Patch Detail

GET /api/1.2/patches/2224365/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 2224365,
    "url": "http://patchwork.ozlabs.org/api/1.2/patches/2224365/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260417104652.17857-14-xiaoou@iscas.ac.cn/",
    "project": {
        "id": 14,
        "url": "http://patchwork.ozlabs.org/api/1.2/projects/14/?format=api",
        "name": "QEMU Development",
        "link_name": "qemu-devel",
        "list_id": "qemu-devel.nongnu.org",
        "list_email": "qemu-devel@nongnu.org",
        "web_url": "",
        "scm_url": "",
        "webscm_url": "",
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20260417104652.17857-14-xiaoou@iscas.ac.cn>",
    "list_archive_url": null,
    "date": "2026-04-17T10:46:50",
    "name": "[13/14] target/riscv: rvp: add rv32-only register-pair instructions",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "9d88b6df697524296578eedb86eee84a916e2924",
    "submitter": {
        "id": 89843,
        "url": "http://patchwork.ozlabs.org/api/1.2/people/89843/?format=api",
        "name": "Molly Chen",
        "email": "xiaoou@iscas.ac.cn"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260417104652.17857-14-xiaoou@iscas.ac.cn/mbox/",
    "series": [
        {
            "id": 500307,
            "url": "http://patchwork.ozlabs.org/api/1.2/series/500307/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=500307",
            "date": "2026-04-17T10:46:37",
            "name": "target/riscv: add support for RISC-V P extension (v0.20 draft)",
            "version": 1,
            "mbox": "http://patchwork.ozlabs.org/series/500307/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/2224365/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/2224365/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>",
        "X-Original-To": "incoming@patchwork.ozlabs.org",
        "Delivered-To": "patchwork-incoming@legolas.ozlabs.org",
        "Authentication-Results": "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists1p.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=patchwork.ozlabs.org)",
        "Received": [
            "from lists1p.gnu.org (lists1p.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fxs7b16P7z1yCv\n\tfor <incoming@patchwork.ozlabs.org>; Fri, 17 Apr 2026 20:48:59 +1000 (AEST)",
            "from localhost ([::1] helo=lists1p.gnu.org)\n\tby lists1p.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces@nongnu.org>)\n\tid 1wDgjs-0001Xw-9Y; Fri, 17 Apr 2026 06:47:56 -0400",
            "from eggs.gnu.org ([2001:470:142:3::10])\n by lists1p.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <xiaoou@iscas.ac.cn>)\n id 1wDgjp-0001Uw-9F; Fri, 17 Apr 2026 06:47:53 -0400",
            "from smtp21.cstnet.cn ([159.226.251.21] helo=cstnet.cn)\n by eggs.gnu.org with esmtps (TLS1.2:DHE_RSA_AES_256_CBC_SHA1:256)\n (Exim 4.90_1) (envelope-from <xiaoou@iscas.ac.cn>)\n id 1wDgjj-00083b-QA; Fri, 17 Apr 2026 06:47:52 -0400",
            "from Huawei.localdomain (unknown [36.110.52.2])\n by APP-01 (Coremail) with SMTP id qwCowAB3H2ulD+JpLDmSDQ--.804S15;\n Fri, 17 Apr 2026 18:47:23 +0800 (CST)"
        ],
        "From": "Molly Chen <xiaoou@iscas.ac.cn>",
        "To": "palmer@dabbelt.com, alistair.francis@wdc.com, liwei1518@gmail.com,\n daniel.barboza@oss.qualcomm.com, zhiwei_liu@linux.alibaba.com,\n chao.liu.zevorn@gmail.com",
        "Cc": "xiaoou@iscas.ac.cn,\n\tqemu-riscv@nongnu.org,\n\tqemu-devel@nongnu.org",
        "Subject": "[PATCH 13/14] target/riscv: rvp: add rv32-only register-pair\n instructions",
        "Date": "Fri, 17 Apr 2026 18:46:50 +0800",
        "Message-Id": "<20260417104652.17857-14-xiaoou@iscas.ac.cn>",
        "X-Mailer": "git-send-email 2.34.1",
        "In-Reply-To": "<20260417104652.17857-1-xiaoou@iscas.ac.cn>",
        "References": "<20260417104652.17857-1-xiaoou@iscas.ac.cn>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-CM-TRANSID": "qwCowAB3H2ulD+JpLDmSDQ--.804S15",
        "X-Coremail-Antispam": "1UD129KBjvAXoWDKw1rGFWfGrykAFy3Kr1DGFg_yoWfXr48to\n W5Gw15Ar97GrW7ua4akw4UXFy7Zry2vwn3Jr45Zr47uayfGr47KFn8Jrn5Zay8JrWFkFWf\n XFZ3Grn5tr1a934Dn29KB7ZKAUJUUUU8529EdanIXcx71UUUUU7v73VFW2AGmfu7bjvjm3\n AaLaJ3UjIYCTnIWjp_UUUY37AC8VAFwI0_Wr0E3s1l1xkIjI8I6I8E6xAIw20EY4v20xva\n j40_Wr0E3s1l1IIY67AEw4v_Jr0_Jr4l82xGYIkIc2x26280x7IE14v26r126s0DM28Irc\n Ia0xkI8VCY1x0267AKxVW5JVCq3wA2ocxC64kIII0Yj41l84x0c7CEw4AK67xGY2AK021l\n 84ACjcxK6xIIjxv20xvE14v26ryj6F1UM28EF7xvwVC0I7IYx2IY6xkF7I0E14v26r4UJV\n WxJr1l84ACjcxK6I8E87Iv67AKxVW0oVCq3wA2z4x0Y4vEx4A2jsIEc7CjxVAFwI0_GcCE\n 3s1le2I262IYc4CY6c8Ij28IcVAaY2xG8wAqx4xG64xvF2IEw4CE5I8CrVC2j2WlYx0E2I\n x0cI8IcVAFwI0_Jrv_JF1lYx0Ex4A2jsIE14v26r4j6F4UMcvjeVCFs4IE7xkEbVWUJVW8\n JwACjI8F5VA0II8E6IAqYI8I648v4I1lc7CjxVAaw2AFwI0_Jw0_GFyl4I8I3I0E4IkC6x\n 0Yz7v_Jr0_Gr1lx2IqxVAqx4xG67AKxVWUJVWUGwC20s026x8GjcxK67AKxVWUGVWUWwC2\n zVAF1VAY17CE14v26r1q6r43MIIF0xvE2Ix0cI8IcVAFwI0_Gr0_Xr1lIxAIcVC0I7IYx2\n IY6xkF7I0E14v26r4UJVWxJr1lIxAIcVCF04k26cxKx2IYs7xG6r1j6r1xMIIF0xvEx4A2\n jsIE14v26r4j6F4UMIIF0xvEx4A2jsIEc7CjxVAFwI0_Gr1j6F4UJbIYCTnIWIevJa73Uj\n IFyTuYvjfU5TmhDUUUU",
        "X-Originating-IP": "[36.110.52.2]",
        "X-CM-SenderInfo": "50ld003x6l2u1dvotugofq/",
        "Received-SPF": "pass client-ip=159.226.251.21; envelope-from=xiaoou@iscas.ac.cn;\n helo=cstnet.cn",
        "X-Spam_score_int": "-21",
        "X-Spam_score": "-2.2",
        "X-Spam_bar": "--",
        "X-Spam_report": "(-2.2 / 5.0 requ) BAYES_00=-1.9, HK_RANDOM_ENVFROM=0.998,\n HK_RANDOM_FROM=0.998, RCVD_IN_DNSWL_MED=-2.3,\n RCVD_IN_VALIDITY_RPBL_BLOCKED=0.001, RCVD_IN_VALIDITY_SAFE_BLOCKED=0.001,\n SPF_HELO_PASS=-0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no",
        "X-Spam_action": "no action",
        "X-BeenThere": "qemu-devel@nongnu.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "qemu development <qemu-devel.nongnu.org>",
        "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>",
        "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>",
        "List-Post": "<mailto:qemu-devel@nongnu.org>",
        "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>",
        "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>",
        "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org",
        "Sender": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org"
    },
    "content": "Signed-off-by: Molly Chen <xiaoou@iscas.ac.cn>\n---\n target/riscv/helper.h                   |  131 ++\n target/riscv/insn32.decode              |  279 +++\n target/riscv/insn_trans/trans_rvp.c.inc |  786 ++++++++-\n target/riscv/psimd_helper.c             | 2068 +++++++++++++++++++++++\n 4 files changed, 3220 insertions(+), 44 deletions(-)",
    "diff": "diff --git a/target/riscv/helper.h b/target/riscv/helper.h\nindex 663ac0e242..85d4fe1b67 100644\n--- a/target/riscv/helper.h\n+++ b/target/riscv/helper.h\n@@ -1737,3 +1737,134 @@ DEF_HELPER_3(pm4addu_h, i64, env, i64, i64)\n DEF_HELPER_4(pm4adda_h, i64, env, i64, i64, i64)\n DEF_HELPER_4(pm4addasu_h, i64, env, i64, i64, i64)\n DEF_HELPER_4(pm4addau_h, i64, env, i64, i64, i64)\n+\n+/* Packed SIMD - Double-Width Operations (RV32 only, register pairs) */\n+DEF_HELPER_3(pwadd_b, i64, env, i32, i32)\n+DEF_HELPER_4(pwadda_b, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwaddu_b, i64, env, i32, i32)\n+DEF_HELPER_4(pwaddau_b, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwsub_b, i64, env, i32, i32)\n+DEF_HELPER_4(pwsuba_b, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwsubu_b, i64, env, i32, i32)\n+DEF_HELPER_4(pwsubau_b, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwslli_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwsll_bs, i64, env, i32, i32)\n+DEF_HELPER_3(pwslai_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwsla_bs, i64, env, i32, i32)\n+\n+DEF_HELPER_3(pwadd_h, i64, env, i32, i32)\n+DEF_HELPER_4(pwadda_h, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwaddu_h, i64, env, i32, i32)\n+DEF_HELPER_4(pwaddau_h, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwsub_h, i64, env, i32, i32)\n+DEF_HELPER_4(pwsuba_h, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwsubu_h, i64, env, i32, i32)\n+DEF_HELPER_4(pwsubau_h, i64, env, i32, i32, i64)\n+DEF_HELPER_3(pwslli_h, i64, env, i32, i32)\n+DEF_HELPER_3(pwsll_hs, i64, env, i32, i32)\n+DEF_HELPER_3(pwslai_h, i64, env, i32, i32)\n+DEF_HELPER_3(pwsla_hs, i64, env, i32, i32)\n+\n+DEF_HELPER_3(wadd, i64, env, i32, i32)\n+DEF_HELPER_4(wadda, i64, env, i32, i32, i64)\n+DEF_HELPER_3(waddu, i64, env, i32, i32)\n+DEF_HELPER_4(waddau, i64, env, i32, i32, i64)\n+DEF_HELPER_3(wsub, i64, env, i32, i32)\n+DEF_HELPER_4(wsuba, i64, env, i32, i32, i64)\n+DEF_HELPER_3(wsubu, i64, env, i32, i32)\n+DEF_HELPER_4(wsubau, i64, env, i32, i32, i64)\n+DEF_HELPER_3(wslli, i64, env, i32, i32)\n+DEF_HELPER_3(wsll, i64, env, i32, i32)\n+DEF_HELPER_3(wslai, i64, env, i32, i32)\n+DEF_HELPER_3(wsla, i64, env, i32, i32)\n+\n+DEF_HELPER_3(wzip8p, i64, env, i32, i32)\n+DEF_HELPER_3(wzip16p, i64, env, i32, i32)\n+\n+DEF_HELPER_4(predsum_dbs, i32, env, i32, i32, i32)\n+DEF_HELPER_4(predsumu_dbs, i32, env, i32, i32, i32)\n+DEF_HELPER_4(predsum_dhs, i32, env, i32, i32, i32)\n+DEF_HELPER_4(predsumu_dhs, i32, env, i32, i32, i32)\n+\n+DEF_HELPER_3(pnsrli_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrai_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrari_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipi_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipri_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipiu_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipriu_b, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrl_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnsra_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrar_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclip_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipr_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipu_bs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipru_bs, i32, env, i64, i32)\n+\n+DEF_HELPER_3(pnsrli_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrai_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrari_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipi_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipri_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipiu_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipriu_h, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrl_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnsra_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnsrar_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclip_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipr_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipu_hs, i32, env, i64, i32)\n+DEF_HELPER_3(pnclipru_hs, i32, env, i64, i32)\n+\n+DEF_HELPER_3(nsrli, i32, env, i64, i32)\n+DEF_HELPER_3(nsrai, i32, env, i64, i32)\n+DEF_HELPER_3(nsrari, i32, env, i64, i32)\n+DEF_HELPER_3(nclipi, i32, env, i64, i32)\n+DEF_HELPER_3(nclipri, i32, env, i64, i32)\n+DEF_HELPER_3(nclipiu, i32, env, i64, i32)\n+DEF_HELPER_3(nclipriu, i32, env, i64, i32)\n+DEF_HELPER_3(nsrl, i32, env, i64, i32)\n+DEF_HELPER_3(nsra, i32, env, i64, i32)\n+DEF_HELPER_3(nsrar, i32, env, i64, i32)\n+DEF_HELPER_3(nclip, i32, env, i64, i32)\n+DEF_HELPER_3(nclipr, i32, env, i64, i32)\n+DEF_HELPER_3(nclipu, i32, env, i64, i32)\n+DEF_HELPER_3(nclipru, i32, env, i64, i32)\n+\n+DEF_HELPER_4(pmqwacc_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pmqrwacc_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(mqwacc, i64, env, i32, i32, i64)\n+DEF_HELPER_4(mqrwacc, i64, env, i32, i32, i64)\n+\n+DEF_HELPER_3(pwmul_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwmulsu_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwmulu_b, i64, env, i32, i32)\n+DEF_HELPER_3(pwmul_h, i64, env, i32, i32)\n+DEF_HELPER_3(pwmulsu_h, i64, env, i32, i32)\n+DEF_HELPER_3(pwmulu_h, i64, env, i32, i32)\n+\n+DEF_HELPER_4(pwmacc_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pwmaccsu_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pwmaccu_h, i64, env, i32, i32, i64)\n+\n+DEF_HELPER_3(wmul, i64, env, i32, i32)\n+DEF_HELPER_3(wmulsu, i64, env, i32, i32)\n+DEF_HELPER_3(wmulu, i64, env, i32, i32)\n+\n+DEF_HELPER_4(wmacc, i64, env, i32, i32, i64)\n+DEF_HELPER_4(wmaccsu, i64, env, i32, i32, i64)\n+DEF_HELPER_4(wmaccu, i64, env, i32, i32, i64)\n+\n+DEF_HELPER_3(pm2wadd_h, i64, env, i32, i32)\n+DEF_HELPER_3(pm2waddsu_h, i64, env, i32, i32)\n+DEF_HELPER_3(pm2waddu_h, i64, env, i32, i32)\n+DEF_HELPER_3(pm2wadd_hx, i64, env, i32, i32)\n+DEF_HELPER_4(pm2wadda_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pm2waddasu_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pm2waddau_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pm2wadda_hx, i64, env, i32, i32, i64)\n+\n+DEF_HELPER_3(pm2wsub_h, i64, env, i32, i32)\n+DEF_HELPER_3(pm2wsub_hx, i64, env, i32, i32)\n+DEF_HELPER_4(pm2wsuba_h, i64, env, i32, i32, i64)\n+DEF_HELPER_4(pm2wsuba_hx, i64, env, i32, i32, i64)\ndiff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode\nindex b1bde37de4..7be0b9e5e6 100644\n--- a/target/riscv/insn32.decode\n+++ b/target/riscv/insn32.decode\n@@ -23,6 +23,9 @@\n %rd        7:5\n %sh5       20:5\n %sh6       20:6\n+%rs2_p     21:4\n+%rs1_p     16:4\n+%rd_p      8:4\n \n %sh7    20:7\n %csr    20:12\n@@ -69,6 +72,7 @@\n &mop5 imm rd rs1\n &mop3 imm rd rs1 rs2\n &p_l  imm rd\n+&p_ui imm rs1 rd\n \n # Formats 32:\n @r       .......   ..... ..... ... ..... ....... &r                %rs2 %rs1 %rd\n@@ -101,6 +105,11 @@\n @r2_zimm11 . zimm:11  ..... ... ..... ....... %rs1 %rd\n @r2_zimm10 .. zimm:10  ..... ... ..... ....... %rs1 %rd\n @r2_s    .......   ..... ..... ... ..... ....... %rs2 %rs1\n+@r_p_1       .......   ..... ..... ... ..... ....... &r    %rs2 %rs1 rd=%rd_p\n+@r_p_2     .......   ..... ..... ... ..... ....... &r    rs2=%rs2_p rs1=%rs1_p rd=%rd_p\n+@r_p_3     .......   ..... ..... ... ..... ....... &r    %rs2 rs1=%rs1_p rd=%rd_p\n+@r_p_4     .......   ..... ..... ... ..... ....... &r    %rs2 rs1=%rs1_p %rd\n+@r2_p      .......   ..... ..... ... ..... ....... &r2   rs1=%rs1_p rd=%rd_p\n \n @hfence_gvma ....... ..... .....   ... ..... ....... %rs2 %rs1\n @hfence_vvma ....... ..... .....   ... ..... ....... %rs2 %rs1\n@@ -122,6 +131,18 @@\n @p_l2  ....... .......... ... ..... ....... &p_l      imm=%imm_p_l2         %rd\n @p_l3  ....... .......... ... ..... ....... &p_l      imm=%imm_p_l3         %rd\n @p_l4  ....... .......... ... ..... ....... &p_l      imm=%imm_p_l4         %rd\n+@p_l1_p  ........ ........ .... ..... ....... &p_l    imm=%imm_p_l1         rd=%rd_p\n+@p_l2_p  ........ ........ .... ..... ....... &p_l    imm=%imm_p_l2         rd=%rd_p\n+@p_l3_p  ....... .......... ... ..... ....... &p_l    imm=%imm_p_l3         rd=%rd_p\n+@p_ui8_p ..... .... ... ..... ... ..... .......  &i imm=%imm_p_ui8 rs1=%rs1_p rd=%rd_p\n+@p_ui16_p ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui16 %rs1 rd=%rd_p\n+@p_ui16_p_2 ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui16 rs1=%rs1_p rd=%rd_p\n+@p_ui16_p_3 ..... .... ... .... .... ..... ....... &p_ui imm=%imm_p_ui16 rs1=%rs1_p %rd\n+@p_ui32_p ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui32 %rs1 rd=%rd_p\n+@p_ui32_p_2 ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui32 rs1=%rs1_p rd=%rd_p\n+@p_ui32_p_3 ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui32 rs1=%rs1_p %rd\n+@p_ui64_p ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui64 %rs1 rd=%rd_p\n+@p_ui64_p_2 ..... .... ... ..... ... ..... ....... &p_ui imm=%imm_p_ui64 rs1=%rs1_p %rd\n \n # Formats 64:\n @sh5     .......  ..... .....  ... ..... ....... &shift  shamt=%sh5      %rs1 %rd\n@@ -1612,3 +1633,261 @@ pli_h    1011000 .......... 010 ..... 0011011 @p_l2\n plui_h   1111000 .......... 010 ..... 0011011 @p_l3\n pli_w    1011001 ..... ..... 010 ..... 0011011 @p_l2\n plui_w   1111001 ..... ..... 010 ..... 0011011 @p_l4\n+\n+# Packed SIMD - Double-Width Operations (RV32 only, register pairs)\n+# register-pair destination\n+pwadd_b    0000010 ..... ..... 010 .... 10011011 @r_p_1\n+pwadda_b   0000110 ..... ..... 010 .... 10011011 @r_p_1\n+pwaddu_b   0001010 ..... ..... 010 .... 10011011 @r_p_1\n+pwaddau_b  0001110 ..... ..... 010 .... 10011011 @r_p_1\n+pwsub_b    0100010 ..... ..... 010 .... 10011011 @r_p_1\n+pwsuba_b   0100110 ..... ..... 010 .... 10011011 @r_p_1\n+pwsubu_b   0101010 ..... ..... 010 .... 10011011 @r_p_1\n+pwsubau_b  0101110 ..... ..... 010 .... 10011011 @r_p_1\n+pwslli_b   00000 001.... ..... 010 .... 00011011 @p_ui16_p\n+pwsll_bs   0000100 ..... ..... 010 .... 00011011 @r_p_1\n+pwslai_b   01000 001.... ..... 010 .... 00011011 @p_ui16_p\n+pwsla_bs   0100100 ..... ..... 010 .... 00011011 @r_p_1\n+\n+pwadd_h    0000000 ..... ..... 010 .... 10011011 @r_p_1\n+pwadda_h   0000100 ..... ..... 010 .... 10011011 @r_p_1\n+pwaddu_h   0001000 ..... ..... 010 .... 10011011 @r_p_1\n+pwaddau_h  0001100 ..... ..... 010 .... 10011011 @r_p_1\n+pwsub_h    0100000 ..... ..... 010 .... 10011011 @r_p_1\n+pwsuba_h   0100100 ..... ..... 010 .... 10011011 @r_p_1\n+pwsubu_h   0101000 ..... ..... 010 .... 10011011 @r_p_1\n+pwsubau_h  0101100 ..... ..... 010 .... 10011011 @r_p_1\n+pwslli_h   00000 01..... ..... 010 .... 00011011 @p_ui32_p\n+pwsll_hs   0000101 ..... ..... 010 .... 00011011 @r_p_1\n+pwslai_h   01000 01..... ..... 010 .... 00011011 @p_ui32_p\n+pwsla_hs   0100101 ..... ..... 010 .... 00011011 @r_p_1\n+\n+wadd    0000001 ..... ..... 010 .... 10011011 @r_p_1\n+wadda   0000101 ..... ..... 010 .... 10011011 @r_p_1\n+waddu   0001001 ..... ..... 010 .... 10011011 @r_p_1\n+waddau  0001101 ..... ..... 010 .... 10011011 @r_p_1\n+wsub    0100001 ..... ..... 010 .... 10011011 @r_p_1\n+wsuba   0100101 ..... ..... 010 .... 10011011 @r_p_1\n+wsubu   0101001 ..... ..... 010 .... 10011011 @r_p_1\n+wsubau  0101101 ..... ..... 010 .... 10011011 @r_p_1\n+wslli   00000 1...... ..... 010 .... 00011011 @p_ui64_p\n+wsll    0000111 ..... ..... 010 .... 00011011 @r_p_1\n+wslai   01000 1...... ..... 010 .... 00011011 @p_ui64_p\n+wsla    0100111 ..... ..... 010 .... 00011011 @r_p_1\n+\n+wzip8p    0111100 ..... ..... 010 .... 00011011 @r_p_1\n+wzip16p   0111101 ..... ..... 010 .... 00011011 @r_p_1\n+\n+#register-pair operands\n+pli_db    00110100 ........ 0010 .... 00011011 @p_l1_p\n+padd_db   1000010 .... 0 .... 0110 .... 00011011 @r_p_2\n+psub_db   1100010 .... 0 .... 0110 .... 00011011 @r_p_2\n+psadd_db  1001010 .... 0 .... 0110 .... 00011011 @r_p_2\n+psaddu_db    1011010 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssub_db     1101010 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssubu_db    1111010 .... 0 .... 0110 .... 00011011 @r_p_2\n+paadd_db     1001110 .... 0 .... 0110 .... 00011011 @r_p_2\n+paaddu_db    1011110 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasub_db     1101110 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasubu_db    1111110 .... 0 .... 0110 .... 00011011 @r_p_2\n+pabd_db      1100110 .... 0 .... 0110 .... 00011011 @r_p_2\n+pabdu_db     1110110 .... 0 .... 0110 .... 00011011 @r_p_2\n+psabs_db     0110010 00111 .... 0110 .... 00011011 @r2_p\n+pli_dh    0011000 .......... 010 .... 00011011 @p_l2_p\n+plui_dh   0111000 .......... 010 .... 00011011 @p_l3_p\n+padd_dh   1000000 .... 0 .... 0110 .... 00011011 @r_p_2\n+psub_dh   1100000 .... 0 .... 0110 .... 00011011 @r_p_2\n+psadd_dh  1001000 .... 0 .... 0110 .... 00011011 @r_p_2\n+psaddu_dh 1011000 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssub_dh  1101000 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssubu_dh 1111000 .... 0 .... 0110 .... 00011011 @r_p_2\n+paadd_dh  1001100 .... 0 .... 0110 .... 00011011 @r_p_2\n+paaddu_dh 1011100 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasub_dh  1101100 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasubu_dh    1111100 .... 0 .... 0110 .... 00011011 @r_p_2\n+psh1add_dh   1010000 .... 1 .... 0110 .... 00011011 @r_p_2\n+pssh1sadd_dh 1011000 .... 1 .... 0110 .... 00011011 @r_p_2\n+pas_dhx    1000000 .... 1 .... 1110 .... 00011011 @r_p_2\n+psa_dhx    1000010 .... 1 .... 1110 .... 00011011 @r_p_2\n+psas_dhx   1001000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pssa_dhx   1001010 .... 1 .... 1110 .... 00011011 @r_p_2\n+paas_dhx   1001100 .... 1 .... 1110 .... 00011011 @r_p_2\n+pasa_dhx   1001110 .... 1 .... 1110 .... 00011011 @r_p_2\n+pabd_dh    1100100 .... 0 .... 0110 .... 00011011 @r_p_2\n+pabdu_dh   1110100 .... 0 .... 0110 .... 00011011 @r_p_2\n+psabs_dh   0110000 00111 .... 0110 .... 00011011 @r2_p\n+padd_dw    1000001 .... 0 .... 0110 .... 00011011 @r_p_2\n+psub_dw    1100001 .... 0 .... 0110 .... 00011011 @r_p_2\n+psadd_dw   1001001 .... 0 .... 0110 .... 00011011 @r_p_2\n+psaddu_dw  1011001 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssub_dw   1101001 .... 0 .... 0110 .... 00011011 @r_p_2\n+pssubu_dw  1111001 .... 0 .... 0110 .... 00011011 @r_p_2\n+paadd_dw   1001101 .... 0 .... 0110 .... 00011011 @r_p_2\n+paaddu_dw  1011101 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasub_dw   1101101 .... 0 .... 0110 .... 00011011 @r_p_2\n+pasubu_dw  1111101 .... 0 .... 0110 .... 00011011 @r_p_2\n+psh1add_dw 1010001 .... 1 .... 0110 .... 00011011 @r_p_2\n+pssh1sadd_dw  1011001 .... 1 .... 0110 .... 00011011 @r_p_2\n+addd_p    1000011 .... 0 .... 0110 .... 00011011 @r_p_2\n+subd_p    1100011 .... 0 .... 0110 .... 00011011 @r_p_2\n+\n+# register-pair first source only\n+predsum_dbs    0001110 ..... .... 0100 ..... 0011011 @r_p_4\n+predsumu_dbs   0011110 ..... .... 0100 ..... 0011011 @r_p_4\n+predsum_dhs    0001100 ..... .... 0100 ..... 0011011 @r_p_4\n+predsumu_dhs   0011100 ..... .... 0100 ..... 0011011 @r_p_4\n+\n+# register-pair operands\n+pslli_db    00000 0001... .... 0110 .... 00011011 @p_ui8_p\n+psrli_db    00000 0001... .... 1110 .... 00011011 @p_ui8_p\n+psrai_db    01000 0001... .... 1110 .... 00011011 @p_ui8_p\n+pmin_db     1110010 .... 1 .... 1110 .... 00011011 @r_p_2\n+pminu_db    1110110 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmax_db     1111010 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmaxu_db    1111110 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmseq_db    1100010 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmslt_db    1101010 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmsltu_db   1101110 .... 1 .... 1110 .... 00011011 @r_p_2\n+psext_dh_b  0110000 00100 .... 0110 .... 00011011 @r2_p\n+psati_dh    01100 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+pusati_dh   00100 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+pslli_dh    00000 001.... .... 0110 .... 00011011 @p_ui16_p_2\n+psrli_dh    00000 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+psrai_dh    01000 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+psslai_dh   01010 001.... .... 0110 .... 00011011 @p_ui16_p_2\n+psrari_dh   01010 001.... .... 1110 .... 00011011 @p_ui16_p_2\n+pmin_dh     1110000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pminu_dh    1110100 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmax_dh     1111000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmaxu_dh    1111100 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmseq_dh    1100000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmslt_dh    1101000 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmsltu_dh   1101100 .... 1 .... 1110 .... 00011011 @r_p_2\n+psext_dw_b  0110001 00100 .... 0110 .... 00011011 @r2_p\n+psext_dw_h  0110001 00101 .... 0110 .... 00011011 @r2_p\n+psati_dw    01100 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+pusati_dw   00100 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+pslli_dw    00000 01..... .... 0110 .... 00011011 @p_ui32_p_2\n+psrli_dw    00000 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+psrai_dw    01000 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+psslai_dw   01010 01..... .... 0110 .... 00011011 @p_ui32_p_2\n+psrari_dw   01010 01..... .... 1110 .... 00011011 @p_ui32_p_2\n+pmin_dw    1110001 .... 1 .... 1110 .... 00011011 @r_p_2\n+pminu_dw   1110101 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmax_dw    1111001 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmaxu_dw   1111101 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmseq_dw    1100001 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmslt_dw    1101001 .... 1 .... 1110 .... 00011011 @r_p_2\n+pmsltu_dw   1101101 .... 1 .... 1110 .... 00011011 @r_p_2\n+\n+# register-pair first source and dest\n+padd_dbs    0001110 ..... .... 0110 .... 00011011 @r_p_3\n+psll_dbs    0000110 ..... .... 0110 .... 00011011 @r_p_3\n+psra_dbs    0100110 ..... .... 1110 .... 00011011 @r_p_3\n+padd_dhs    0001100 ..... .... 0110 .... 00011011 @r_p_3\n+psll_dhs    0000100 ..... .... 0110 .... 00011011 @r_p_3\n+psrl_dhs    0000100 ..... .... 1110 .... 00011011 @r_p_3\n+psra_dhs    0100100 ..... .... 1110 .... 00011011 @r_p_3\n+pssha_dhs   0110100 ..... .... 0110 .... 00011011 @r_p_3\n+psshar_dhs  0111100 ..... .... 0110 .... 00011011 @r_p_3\n+padd_dws    0001101 ..... .... 0110 .... 00011011 @r_p_3\n+psll_dws    0000101 ..... .... 0110 .... 00011011 @r_p_3\n+psrl_dws    0000101 ..... .... 1110 .... 00011011 @r_p_3\n+psra_dws    0100101 ..... .... 1110 .... 00011011 @r_p_3\n+pssha_dws   0110101 ..... .... 0110 .... 00011011 @r_p_3\n+psshar_dws  0111101 ..... .... 0110 .... 00011011 @r_p_3\n+\n+# register-pair operands\n+ppaire_db    1000000 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppaireo_db  1001000 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppairoe_db  1010000 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppairo_db   1011000 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppaire_dh    1000001 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppaireo_dh  1001001 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppairoe_dh  1010001 .... 0 .... 1110 .... 00011011 @r_p_2\n+ppairo_dh   1011001 .... 0 .... 1110 .... 00011011 @r_p_2\n+\n+#register-pair first source only\n+pnsrli_b    00000 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnsrai_b    01000 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnsrari_b   01010 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnclipi_b   01100 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnclipri_b  01110 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnclipiu_b  00100 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnclipriu_b 00110 001.... .... 1100 ..... 0011011 @p_ui16_p_3\n+pnsrl_bs    00001 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnsra_bs    01001 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnsrar_bs   01011 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclip_bs   01101 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipr_bs  01111 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipu_bs  00101 00 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipru_bs 00111 00 ..... .... 1100 ..... 0011011 @r_p_4\n+\n+pnsrli_h    00000 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnsrai_h    01000 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnsrari_h   01010 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnclipi_h   01100 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnclipri_h  01110 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnclipiu_h  00100 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnclipriu_h 00110 01..... .... 1100 ..... 0011011 @p_ui32_p_3\n+pnsrl_hs    00001 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnsra_hs    01001 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnsrar_hs   01011 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclip_hs   01101 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipr_hs  01111 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipu_hs  00101 01 ..... .... 1100 ..... 0011011 @r_p_4\n+pnclipru_hs 00111 01 ..... .... 1100 ..... 0011011 @r_p_4\n+\n+nsrli       00000 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nsrai       01000 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nsrari      01010 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nclipi      01100 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nclipri     01110 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nclipiu     00100 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nclipriu    00110 1...... .... 1100 ..... 0011011 @p_ui64_p_2\n+nsrl        00001 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nsra        01001 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nsrar       01011 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nclip       01101 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nclipr      01111 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nclipu      00101 11 ..... .... 1100 ..... 0011011 @r_p_4\n+nclipru     00111 11 ..... .... 1100 ..... 0011011 @r_p_4\n+\n+# register-pair multiply\n+pmqwacc_h       01111 00 ..... ..... 010 .... 10011011 @r_p_1\n+pmqrwacc_h      01111 10 ..... ..... 010 .... 10011011 @r_p_1\n+mqwacc          01111 01 ..... ..... 010 .... 10011011 @r_p_1\n+mqrwacc         01111 11 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pwmul_b         00100 10 ..... ..... 010 .... 10011011 @r_p_1\n+pwmulsu_b       01100 10 ..... ..... 010 .... 10011011 @r_p_1\n+pwmulu_b        00110 10 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pwmul_h         00100 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmulsu_h       01100 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmulu_h        00110 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmacc_h        00101 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmaccsu_h      01101 00 ..... ..... 010 .... 10011011 @r_p_1\n+pwmaccu_h       00111 00 ..... ..... 010 .... 10011011 @r_p_1\n+\n+wmul            00100 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmulsu          01100 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmulu           00110 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmacc           00101 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmaccsu         01101 01 ..... ..... 010 .... 10011011 @r_p_1\n+wmaccu          00111 01 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pm2wadd_h       00000 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2waddsu_h     01100 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2waddu_h      00100 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wadd_hx      00010 11 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pm2wadda_h      00001 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2waddasu_h    01101 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2waddau_h     00101 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wadda_hx     00011 11 ..... ..... 010 .... 10011011 @r_p_1\n+\n+pm2wsub_h       01000 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wsub_hx      01010 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wsuba_h      01001 11 ..... ..... 010 .... 10011011 @r_p_1\n+pm2wsuba_hx     01011 11 ..... ..... 010 .... 10011011 @r_p_1\ndiff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc\nindex b82774e00f..ca459293a3 100644\n--- a/target/riscv/insn_trans/trans_rvp.c.inc\n+++ b/target/riscv/insn_trans/trans_rvp.c.inc\n@@ -2,6 +2,38 @@\n /* RISC-V translation routines for the P Standard Extensions. */\n /* Copyright (c) 2026 ISRC ISCAS. */\n \n+/* Save a 64 bit data in src to dst and dst + 1 */\n+static void set_pair_regs(DisasContext *ctx, int dst, TCGv_i64 src)\n+{\n+#if defined(TARGET_RISCV32)\n+    TCGv_i64 tl_64 = tcg_temp_new_i64();\n+    TCGv_i64 th_64 = tcg_temp_new_i64();\n+    TCGv_i32 tl_32 = tcg_temp_new_i32();\n+    TCGv_i32 th_32 = tcg_temp_new_i32();\n+    tcg_gen_extract_i64(tl_64, src, 0, 32);\n+    tcg_gen_extract_i64(th_64, src, 32, 32);\n+    tcg_gen_trunc_i64_tl(tl_32, tl_64);\n+    tcg_gen_trunc_i64_tl(th_32, th_64);\n+    gen_set_gpr(ctx, dst, tl_32);\n+    gen_set_gpr(ctx, dst + 1, th_32);\n+# else\n+    gen_set_gpr(ctx, dst, src);\n+#endif\n+}\n+\n+/* Concat two 32 bit data in src and src + 1 to dst */\n+static void get_pair_regs(DisasContext *ctx, TCGv_i64 dst, int src)\n+{\n+#if defined(TARGET_RISCV32)\n+    TCGv t1 = get_gpr(ctx, src, EXT_NONE);\n+    TCGv t2 = get_gpr(ctx, src + 1, EXT_NONE);\n+    tcg_gen_concat_i32_i64(dst, t1, t2);\n+#else\n+    TCGv t1 = get_gpr(ctx, src, EXT_NONE);\n+    tcg_gen_mov_tl(dst, t1);\n+#endif\n+}\n+\n #define GEN_SIMD_TRANS(NAME)                                \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n@@ -10,7 +42,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n    TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);              \\\n    TCGv dest = dest_gpr(ctx, a->rd);                        \\\n    gen_helper_##NAME(dest, tcg_env, src1, src2);            \\\n-   return true;                                             \\\n+   return true;                                            \\\n }\n \n #if defined(TARGET_RISCV32)\n@@ -23,14 +55,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a)  \\\n     TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);             \\\n     TCGv dest = dest_gpr(ctx, a->rd);                       \\\n     gen_helper_##NAME(dest, tcg_env, src1, src2);           \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_32(NAME)                             \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a)  \\\n {                                                           \\\n     REQUIRE_32BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -39,7 +71,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a)  \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a)  \\\n {                                                           \\\n    REQUIRE_64BIT(ctx);                                      \\\n-   return true;                                             \\\n+   return true;                                            \\\n }\n #else\n #define GEN_SIMD_TRANS_64(NAME)                             \\\n@@ -51,7 +83,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a)  \\\n     TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);             \\\n     TCGv dest = dest_gpr(ctx, a->rd);                       \\\n     gen_helper_##NAME(dest, tcg_env, src1, src2);           \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -65,7 +97,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv t = tcg_temp_new();                                \\\n     gen_helper_##NAME(t, tcg_env, src1, src2, dest);        \\\n     gen_set_gpr(ctx, a->rd, t);                             \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n \n #if defined(TARGET_RISCV32)\n@@ -80,14 +112,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv t = tcg_temp_new();                                \\\n     gen_helper_##NAME(t, tcg_env, src1, src2, dest);        \\\n     gen_set_gpr(ctx, a->rd, t);                             \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_ACC_32(NAME)                         \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_32BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -96,7 +128,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_64BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_ACC_64(NAME)                         \\\n@@ -110,7 +142,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv t = tcg_temp_new();                                \\\n     gen_helper_##NAME(t, tcg_env, src1, src2, dest);        \\\n     gen_set_gpr(ctx, a->rd, t);                             \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -122,7 +154,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv dest = dest_gpr(ctx, a->rd);                       \\\n     gen_helper_##NAME(dest, tcg_env, src1);                 \\\n     gen_set_gpr(ctx, a->rd, dest);                          \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n \n #if defined(TARGET_RISCV32)\n@@ -130,7 +162,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_64BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_R1_64(NAME)                          \\\n@@ -141,7 +173,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);             \\\n     TCGv dest = dest_gpr(ctx, a->rd);                       \\\n     gen_helper_##NAME(dest, tcg_env, src1);                 \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -153,7 +185,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv imm = tcg_constant_tl(a->imm);                     \\\n     TCGv dest = dest_gpr(ctx, a->rd);                       \\\n     gen_helper_##NAME(dest, tcg_env, src1, imm);            \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n \n #if defined(TARGET_RISCV32)\n@@ -166,14 +198,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv imm = tcg_constant_tl(a->imm);                     \\\n     TCGv dest = dest_gpr(ctx, a->rd);                       \\\n     gen_helper_##NAME(dest, tcg_env, src1, imm);            \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_IMM_32(NAME)                         \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_32BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -182,7 +214,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_64BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_IMM_64(NAME)                         \\\n@@ -194,7 +226,7 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv imm = tcg_constant_tl(a->imm);                     \\\n     TCGv dest = dest_gpr(ctx, a->rd);                       \\\n     gen_helper_##NAME(dest, tcg_env, src1, imm);            \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -209,14 +241,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv_i64 t = tcg_temp_new_i64();                        \\\n     gen_helper_##NAME(t, tcg_env, src1, src2);              \\\n     set_pair_regs(ctx, (a->rd) * 2, t);                       \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_1(NAME)                     \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_32BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -234,14 +266,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)    \\\n     TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);                  \\\n     gen_helper_##HELPER(dest_0, tcg_env, src1_0, src2_0);      \\\n     gen_helper_##HELPER(dest_1, tcg_env, src1_1, src2_1);      \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_2(INSN, HELPER)                \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)    \\\n {                                                              \\\n     REQUIRE_32BIT(ctx);                                        \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #endif\n \n@@ -257,14 +289,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)    \\\n     TCGv src2   = get_gpr(ctx, a->rs2, EXT_NONE);              \\\n     gen_helper_##HELPER(dest_0, tcg_env, src1_0, src2);        \\\n     gen_helper_##HELPER(dest_1, tcg_env, src1_1, src2);        \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_3(INSN, HELPER)                \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)    \\\n {                                                              \\\n     REQUIRE_32BIT(ctx);                                        \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #endif\n \n@@ -282,14 +314,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)     \\\n     TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);                  \\\n     gen_helper_##HELPER(dest_0, tcg_env, src1_0, src2_0);      \\\n     gen_helper_##HELPER(dest_1, tcg_env, src1_1, src2_1);      \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_DW(INSN, HELPER)               \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)     \\\n {                                                              \\\n     REQUIRE_32BIT(ctx);                                        \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #endif\n \n@@ -307,14 +339,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)     \\\n     TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);                  \\\n     gen_helper_##HELPER(dest_0, tcg_env, src1_0, imm_0);       \\\n     gen_helper_##HELPER(dest_1, tcg_env, src1_1, imm_1);       \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_DW_IMM(INSN, HELPER)           \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)     \\\n {                                                              \\\n     REQUIRE_32BIT(ctx);                                        \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #endif\n \n@@ -332,14 +364,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)     \\\n     TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);                  \\\n     gen_helper_##HELPER##_32(dest_0, tcg_env, src1_0, imm_0);  \\\n     gen_helper_##HELPER##_32(dest_1, tcg_env, src1_1, imm_1);  \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_DW_IMM_2(INSN, HELPER)         \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)     \\\n {                                                              \\\n     REQUIRE_32BIT(ctx);                                        \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #endif\n \n@@ -356,14 +388,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)    \\\n     gen_helper_##HELPER(dest_1, tcg_env, src1_1);              \\\n     gen_set_gpr(ctx, (a->rd) * 2, dest_0);                       \\\n     gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);                     \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_5(INSN, HELPER)                \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)    \\\n {                                                              \\\n     REQUIRE_32BIT(ctx);                                        \\\n-    return true;                                               \\\n+    return true;                                              \\\n }\n #endif\n \n@@ -378,14 +410,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv_i64 t = tcg_temp_new_i64();                        \\\n     gen_helper_##NAME(t, tcg_env, src1, imm);               \\\n     set_pair_regs(ctx, (a->rd) * 2, t);                       \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_IMM(NAME)                   \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_32BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -403,14 +435,14 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)  \\\n     TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);                \\\n     gen_helper_##HELPER(dest_0, tcg_env, src1_0, imm_0);     \\\n     gen_helper_##HELPER(dest_1, tcg_env, src1_1, imm_1);     \\\n-    return true;                                             \\\n+    return true;                                            \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_IMM_2(INSN, HELPER)          \\\n static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a)  \\\n {                                                            \\\n     REQUIRE_32BIT(ctx);                                      \\\n-    return true;                                             \\\n+    return true;                                            \\\n }\n #endif\n \n@@ -430,14 +462,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     }                                                       \\\n     gen_helper_##NAME(t, tcg_env, src1, src2, t);           \\\n     set_pair_regs(ctx, (a->rd) * 2, t);                       \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_ACC_REG_PAIR_1(NAME)                 \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_32BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -461,14 +493,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n         src1_h = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);      \\\n     }                                                       \\\n     gen_helper_##NAME(dest, tcg_env, src1_l, src1_h, src2); \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #else\n #define GEN_SIMD_TRANS_REG_PAIR_PREDSUM(NAME)               \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                           \\\n     REQUIRE_32BIT(ctx);                                     \\\n-    return true;                                            \\\n+    return true;                                           \\\n }\n #endif\n \n@@ -487,14 +519,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv shamt = tcg_constant_tl(a->imm);                  \\\n     TCGv_i32 dest = dest_gpr(ctx, a->rd);                  \\\n     gen_helper_##NAME(dest, tcg_env, s1, shamt);           \\\n-    return true;                                           \\\n+    return true;                                          \\\n }\n #else\n #define GEN_SIMD_TRANS_PN_OP_IMM(NAME)                     \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                          \\\n     REQUIRE_32BIT(ctx);                                    \\\n-    return true;                                           \\\n+    return true;                                          \\\n }\n #endif\n \n@@ -513,14 +545,14 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n     TCGv_i32 rs2 = get_gpr(ctx, a->rs2, EXT_NONE);         \\\n     TCGv_i32 dest = dest_gpr(ctx, a->rd);                  \\\n     gen_helper_##NAME(dest, tcg_env, s1, rs2);             \\\n-    return true;                                           \\\n+    return true;                                          \\\n }\n #else\n #define GEN_SIMD_TRANS_PN_OP_REG(NAME)                     \\\n static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \\\n {                                                          \\\n     REQUIRE_32BIT(ctx);                                    \\\n-    return true;                                           \\\n+    return true;                                          \\\n }\n #endif\n \n@@ -907,6 +939,236 @@ GEN_SIMD_TRANS_ACC_64(pm4adda_h)\n GEN_SIMD_TRANS_ACC_64(pm4addasu_h)\n GEN_SIMD_TRANS_ACC_64(pm4addau_h)\n \n+/* Packed SIMD - Double-Width Operations (RV32 only, register pairs) */\n+GEN_SIMD_TRANS_REG_PAIR_1(pwadd_b)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwadda_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwaddu_b)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwaddau_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsub_b)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwsuba_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsubu_b)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwsubau_b)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(pwslli_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsll_bs)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(pwslai_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsla_bs)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pwadd_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwadda_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwaddu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwaddau_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsub_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwsuba_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsubu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwsubau_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(pwslli_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsll_hs)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(pwslai_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwsla_hs)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(wadd)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wadda)\n+GEN_SIMD_TRANS_REG_PAIR_1(waddu)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(waddau)\n+GEN_SIMD_TRANS_REG_PAIR_1(wsub)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wsuba)\n+GEN_SIMD_TRANS_REG_PAIR_1(wsubu)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wsubau)\n+\n+GEN_SIMD_TRANS_REG_PAIR_IMM(wslli)\n+GEN_SIMD_TRANS_REG_PAIR_1(wsll)\n+GEN_SIMD_TRANS_REG_PAIR_IMM(wslai)\n+GEN_SIMD_TRANS_REG_PAIR_1(wsla)\n+\n+GEN_SIMD_TRANS_REG_PAIR_2(padd_db, padd_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(psub_db, psub_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(psadd_db, psadd_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(psaddu_db, psaddu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssub_db, pssub_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssubu_db, pssubu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(paadd_db, paadd_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(paaddu_db, paaddu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasub_db, pasub_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasubu_db, pasubu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pabd_db, pabd_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pabdu_db, pabdu_b)\n+GEN_SIMD_TRANS_REG_PAIR_5(psabs_db, psabs_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(padd_dh, padd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(psub_dh, psub_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(psadd_dh, psadd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(psaddu_dh, psaddu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssub_dh, pssub_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssubu_dh, pssubu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(paadd_dh, paadd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(paaddu_dh, paaddu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasub_dh, pasub_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasubu_dh, pasubu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(psh1add_dh, psh1add_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssh1sadd_dh, pssh1sadd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pas_dhx, pas_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(psa_dhx, psa_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(psas_dhx, psas_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(pssa_dhx, pssa_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(paas_dhx, paas_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(pasa_dhx, pasa_hx)\n+GEN_SIMD_TRANS_REG_PAIR_2(pabd_dh, pabd_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pabdu_dh, pabdu_h)\n+GEN_SIMD_TRANS_REG_PAIR_5(psabs_dh, psabs_h)\n+GEN_SIMD_TRANS_REG_PAIR_DW(psadd_dw, sadd)\n+GEN_SIMD_TRANS_REG_PAIR_DW(psaddu_dw, saddu)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pssub_dw, ssub)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pssubu_dw, ssubu)\n+GEN_SIMD_TRANS_REG_PAIR_DW(paadd_dw, aadd)\n+GEN_SIMD_TRANS_REG_PAIR_DW(paaddu_dw, aaddu)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pasub_dw, asub)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pasubu_dw, asubu)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pssh1sadd_dw, ssh1sadd)\n+\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(pslli_db, pslli_b)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrli_db, psrli_b)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrai_db, psrai_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmin_db, pmin_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pminu_db, pminu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmax_db, pmax_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmaxu_db, pmaxu_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmseq_db, pmseq_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmslt_db, pmslt_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmsltu_db, pmsltu_b)\n+GEN_SIMD_TRANS_REG_PAIR_5(psext_dh_b, psext_h_b)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psati_dh, psati_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(pusati_dh, pusati_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(pslli_dh, pslli_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrli_dh, psrli_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrai_dh, psrai_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psslai_dh, psslai_h)\n+GEN_SIMD_TRANS_REG_PAIR_IMM_2(psrari_dh, psrari_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmin_dh, pmin_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pminu_dh, pminu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmax_dh, pmax_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmaxu_dh, pmaxu_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmseq_dh, pmseq_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmslt_dh, pmslt_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(pmsltu_dh, pmsltu_h)\n+GEN_SIMD_TRANS_REG_PAIR_DW_IMM_2(psati_dw, sati)\n+GEN_SIMD_TRANS_REG_PAIR_DW_IMM_2(pusati_dw, usati)\n+GEN_SIMD_TRANS_REG_PAIR_DW_IMM(psslai_dw, sslai)\n+GEN_SIMD_TRANS_REG_PAIR_DW_IMM_2(psrari_dw, srari)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pmseq_dw, mseq)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pmslt_dw, mslt)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pmsltu_dw, msltu)\n+\n+GEN_SIMD_TRANS_REG_PAIR_3(padd_dbs, padd_bs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psll_dbs, psll_bs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psra_dbs, psra_bs)\n+GEN_SIMD_TRANS_REG_PAIR_3(padd_dhs, padd_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psll_dhs, psll_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psrl_dhs, psrl_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psra_dhs, psra_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(pssha_dhs, pssha_hs)\n+GEN_SIMD_TRANS_REG_PAIR_3(psshar_dhs, psshar_hs)\n+GEN_SIMD_TRANS_REG_PAIR_DW(pssha_dws, ssha)\n+GEN_SIMD_TRANS_REG_PAIR_DW(psshar_dws, sshar)\n+\n+GEN_SIMD_TRANS_REG_PAIR_2(ppairo_db, ppairo_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppairo_dh, ppairo_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppaire_db, ppaire_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppaireo_db, ppaireo_b)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppaireo_dh, ppaireo_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppairoe_dh, ppairoe_h)\n+GEN_SIMD_TRANS_REG_PAIR_2(ppairoe_db, ppairoe_b)\n+\n+GEN_SIMD_TRANS_REG_PAIR_PREDSUM(predsum_dbs)\n+GEN_SIMD_TRANS_REG_PAIR_PREDSUM(predsumu_dbs)\n+GEN_SIMD_TRANS_REG_PAIR_PREDSUM(predsum_dhs)\n+GEN_SIMD_TRANS_REG_PAIR_PREDSUM(predsumu_dhs)\n+\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrli_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrai_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrari_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipi_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipri_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipiu_b)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipriu_b)\n+\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrli_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrai_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnsrari_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipi_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipri_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipiu_h)\n+GEN_SIMD_TRANS_PN_OP_IMM(pnclipriu_h)\n+\n+GEN_SIMD_TRANS_PN_OP_IMM(nsrli)\n+GEN_SIMD_TRANS_PN_OP_IMM(nsrai)\n+GEN_SIMD_TRANS_PN_OP_IMM(nsrari)\n+GEN_SIMD_TRANS_PN_OP_IMM(nclipi)\n+GEN_SIMD_TRANS_PN_OP_IMM(nclipri)\n+GEN_SIMD_TRANS_PN_OP_IMM(nclipiu)\n+GEN_SIMD_TRANS_PN_OP_IMM(nclipriu)\n+\n+GEN_SIMD_TRANS_PN_OP_REG(pnsrl_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnsra_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnsrar_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclip_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipr_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipu_bs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipru_bs)\n+\n+GEN_SIMD_TRANS_PN_OP_REG(pnsrl_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnsra_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnsrar_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclip_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipr_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipu_hs)\n+GEN_SIMD_TRANS_PN_OP_REG(pnclipru_hs)\n+\n+GEN_SIMD_TRANS_PN_OP_REG(nsrl)\n+GEN_SIMD_TRANS_PN_OP_REG(nsra)\n+GEN_SIMD_TRANS_PN_OP_REG(nsrar)\n+GEN_SIMD_TRANS_PN_OP_REG(nclip)\n+GEN_SIMD_TRANS_PN_OP_REG(nclipr)\n+GEN_SIMD_TRANS_PN_OP_REG(nclipu)\n+GEN_SIMD_TRANS_PN_OP_REG(nclipru)\n+\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pmqwacc_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pmqrwacc_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(mqwacc)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(mqrwacc)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmul_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmulsu_b)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmulu_b)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmul_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmulsu_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pwmulu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwmacc_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwmaccsu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pwmaccu_h)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(wmul)\n+GEN_SIMD_TRANS_REG_PAIR_1(wmulsu)\n+GEN_SIMD_TRANS_REG_PAIR_1(wmulu)\n+\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wmacc)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wmaccsu)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(wmaccu)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2wadd_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2waddsu_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2waddu_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2wadd_hx)\n+\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2wadda_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2waddasu_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2waddau_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2wadda_hx)\n+\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2wsub_h)\n+GEN_SIMD_TRANS_REG_PAIR_1(pm2wsub_hx)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2wsuba_h)\n+GEN_SIMD_TRANS_ACC_REG_PAIR_1(pm2wsuba_hx)\n+\n static bool trans_pli_b(DisasContext *ctx, arg_pli_b * a)\n {\n     REQUIRE_EXT(ctx, RVP);\n@@ -973,3 +1235,439 @@ static bool trans_plui_w(DisasContext *ctx, arg_plui_w * a)\n     gen_set_gpri(ctx, a->rd, imm);\n     return true;\n }\n+\n+static bool trans_pli_db(DisasContext *ctx, arg_pli_db * a)\n+{\n+    REQUIRE_EXT(ctx, RVP);\n+    int i = 1;\n+    target_long imm = a->imm;\n+    while (i < TARGET_LONG_SIZE) {\n+        imm = ((imm << 8) + a->imm);\n+        i++;\n+    }\n+    gen_set_gpri(ctx, (a->rd) * 2, imm);\n+    gen_set_gpri(ctx, (a->rd) * 2 + 1, imm);\n+    return true;\n+}\n+\n+static bool trans_pli_dh(DisasContext *ctx, arg_pli_dh * a)\n+{\n+    REQUIRE_EXT(ctx, RVP);\n+    int i = 1;\n+    target_long imm = a->imm;\n+    while (i < TARGET_LONG_SIZE / 2) {\n+        imm = (imm << 16) + (a->imm & 0xFFFF);\n+        i++;\n+    }\n+    gen_set_gpri(ctx, (a->rd) * 2, imm);\n+    gen_set_gpri(ctx, (a->rd) * 2 + 1, imm);\n+    return true;\n+}\n+\n+static bool trans_plui_dh(DisasContext *ctx, arg_plui_dh * a)\n+{\n+    REQUIRE_EXT(ctx, RVP);\n+    int i = 1;\n+    target_long imm = a->imm;\n+    while (i < TARGET_LONG_SIZE / 2) {\n+        imm = (imm << 16) + (a->imm & 0xFFFF);\n+        i++;\n+    }\n+    gen_set_gpri(ctx, (a->rd) * 2, imm);\n+    gen_set_gpri(ctx, (a->rd) * 2 + 1, imm);\n+    return true;\n+}\n+\n+static bool trans_padd_dw(DisasContext *ctx, arg_padd_dw * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+    TCGv src2_0 = get_gpr(ctx, (a->rs2) * 2, EXT_NONE);\n+    TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+    TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+    TCGv src2_1 = get_gpr(ctx, (a->rs2) * 2 + 1, EXT_NONE);\n+    TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+    tcg_gen_add_tl(dest_0, src1_0, src2_0);\n+    tcg_gen_add_tl(dest_1, src1_1, src2_1);\n+    gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+    gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+    return true;\n+}\n+\n+static bool trans_psub_dw(DisasContext *ctx, arg_psub_dw * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+    TCGv src2_0 = get_gpr(ctx, (a->rs2) * 2, EXT_NONE);\n+    TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+    TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+    TCGv src2_1 = get_gpr(ctx, (a->rs2) * 2 + 1, EXT_NONE);\n+    TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+    tcg_gen_sub_tl(dest_0, src1_0, src2_0);\n+    tcg_gen_sub_tl(dest_1, src1_1, src2_1);\n+    gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+    gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+    return true;\n+}\n+\n+static bool trans_psh1add_dw(DisasContext *ctx, arg_psh1add_dw * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+    TCGv src2_0 = get_gpr(ctx, (a->rs2) * 2, EXT_NONE);\n+    TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+    TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+    TCGv src2_1 = get_gpr(ctx, (a->rs2) * 2 + 1, EXT_NONE);\n+    TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+    gen_sh1add(dest_0, src1_0, src2_0);\n+    gen_sh1add(dest_1, src1_1, src2_1);\n+    gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+    gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+    return true;\n+}\n+\n+/* Verify rd is not zero register for wzip8p and wzip16p. */\n+#if defined(TARGET_RISCV32)\n+static bool trans_wzip8p(DisasContext *ctx, arg_wzip8p * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv_i32 src1 = get_gpr(ctx, a->rs1, EXT_NONE);\n+    TCGv_i32 src2 = get_gpr(ctx, a->rs2, EXT_NONE);\n+    TCGv_i64 t = tcg_temp_new_i64();\n+    if (a->rd == 0) {\n+        return true;\n+    } else {\n+        get_pair_regs(ctx, t, (a->rd) * 2);\n+    }\n+    gen_helper_wzip8p(t, tcg_env, src1, src2);\n+    set_pair_regs(ctx, (a->rd) * 2, t);\n+    return true;\n+}\n+#else\n+static bool trans_wzip8p(DisasContext *ctx, arg_wzip8p * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    return true;\n+}\n+#endif\n+\n+#if defined(TARGET_RISCV32)\n+static bool trans_wzip16p(DisasContext *ctx, arg_wzip16p * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv_i32 src1 = get_gpr(ctx, a->rs1, EXT_NONE);\n+    TCGv_i32 src2 = get_gpr(ctx, a->rs2, EXT_NONE);\n+    TCGv_i64 t = tcg_temp_new_i64();\n+    if (a->rd == 0) {\n+        return true;\n+    } else {\n+        get_pair_regs(ctx, t, (a->rd) * 2);\n+    }\n+    gen_helper_wzip16p(t, tcg_env, src1, src2);\n+    set_pair_regs(ctx, (a->rd) * 2, t);\n+    return true;\n+}\n+#else\n+static bool trans_wzip16p(DisasContext *ctx, arg_wzip16p * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    return true;\n+}\n+#endif\n+\n+static bool trans_addd_p(DisasContext *ctx, arg_addd_p * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv_i64 src1 = tcg_temp_new_i64();\n+    TCGv_i64 src2 = tcg_temp_new_i64();\n+    TCGv_i64 dest = tcg_temp_new_i64();\n+    get_pair_regs(ctx, src1, (a->rs1) * 2);\n+    get_pair_regs(ctx, src2, (a->rs2) * 2);\n+    get_pair_regs(ctx, dest, (a->rd) * 2);\n+    tcg_gen_add_i64(dest, src1, src2);\n+    set_pair_regs(ctx, (a->rd) * 2, dest);\n+\n+    return true;\n+}\n+\n+static bool trans_subd_p(DisasContext *ctx, arg_subd_p * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv_i64 src1 = tcg_temp_new_i64();\n+    TCGv_i64 src2 = tcg_temp_new_i64();\n+    TCGv_i64 dest = tcg_temp_new_i64();\n+    get_pair_regs(ctx, src1, (a->rs1) * 2);\n+    get_pair_regs(ctx, src2, (a->rs2) * 2);\n+    get_pair_regs(ctx, dest, (a->rd) * 2);\n+    tcg_gen_sub_i64(dest, src1, src2);\n+    set_pair_regs(ctx, (a->rd) * 2, dest);\n+\n+    return true;\n+}\n+\n+static bool trans_psext_dw_b(DisasContext *ctx, arg_psext_dw_b * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+    TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+    TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+    TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+\n+    tcg_gen_ext8s_tl(dest_0, src1_0);\n+    gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+\n+    tcg_gen_ext8s_tl(dest_1, src1_1);\n+    gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+\n+    return true;\n+}\n+\n+static bool trans_psext_dw_h(DisasContext *ctx, arg_psext_dw_h * a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    TCGv dest_0 = dest_gpr(ctx, (a->rd) * 2);\n+    TCGv src1_0 = get_gpr(ctx, (a->rs1) * 2, EXT_NONE);\n+    TCGv dest_1 = dest_gpr(ctx, (a->rd) * 2 + 1);\n+    TCGv src1_1 = get_gpr(ctx, (a->rs1) * 2 + 1, EXT_NONE);\n+\n+    tcg_gen_ext16s_tl(dest_0, src1_0);\n+    gen_set_gpr(ctx, (a->rd) * 2, dest_0);\n+\n+    tcg_gen_ext16s_tl(dest_1, src1_1);\n+    gen_set_gpr(ctx, (a->rd) * 2 + 1, dest_1);\n+\n+    return true;\n+}\n+\n+static bool trans_pslli_dw(DisasContext *ctx, arg_pslli_dw *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    arg_shift a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.shamt = a->imm;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.shamt = a->imm;\n+\n+    gen_shift_imm_fn(ctx, &a0, EXT_NONE, tcg_gen_shli_tl, NULL);\n+    gen_shift_imm_fn(ctx, &a1, EXT_NONE, tcg_gen_shli_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_psrli_dw(DisasContext *ctx, arg_psrli_dw *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    arg_shift a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.shamt = a->imm;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.shamt = a->imm;\n+\n+    gen_shift_imm_fn_per_ol(ctx, &a0, EXT_NONE, tcg_gen_shri_tl,\n+                            gen_srliw, NULL);\n+    gen_shift_imm_fn_per_ol(ctx, &a1, EXT_NONE, tcg_gen_shri_tl,\n+                            gen_srliw, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_psrai_dw(DisasContext *ctx, arg_psrai_dw *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    arg_shift a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.shamt = a->imm;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.shamt = a->imm;\n+\n+    gen_shift_imm_fn_per_ol(ctx, &a0, EXT_NONE, tcg_gen_sari_tl,\n+                            gen_sraiw, NULL);\n+    gen_shift_imm_fn_per_ol(ctx, &a1, EXT_NONE, tcg_gen_sari_tl,\n+                            gen_sraiw, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_pmin_dw(DisasContext *ctx, arg_pmin_dw *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    REQUIRE_ZBB(ctx);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = (a->rs2) * 2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = (a->rs2) * 2 + 1;\n+\n+    gen_arith(ctx, &a0, EXT_SIGN, tcg_gen_smin_tl, NULL);\n+    gen_arith(ctx, &a1, EXT_SIGN, tcg_gen_smin_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_pminu_dw(DisasContext *ctx, arg_pminu_dw *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    REQUIRE_ZBB(ctx);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = (a->rs2) * 2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = (a->rs2) * 2 + 1;\n+\n+    gen_arith(ctx, &a0, EXT_SIGN, tcg_gen_umin_tl, NULL);\n+    gen_arith(ctx, &a1, EXT_SIGN, tcg_gen_umin_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_pmax_dw(DisasContext *ctx, arg_pmax_dw *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    REQUIRE_ZBB(ctx);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = (a->rs2) * 2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = (a->rs2) * 2 + 1;\n+\n+    gen_arith(ctx, &a0, EXT_SIGN, tcg_gen_smax_tl, NULL);\n+    gen_arith(ctx, &a1, EXT_SIGN, tcg_gen_smax_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_pmaxu_dw(DisasContext *ctx, arg_pmaxu_dw *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    REQUIRE_ZBB(ctx);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = (a->rs2) * 2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = (a->rs2) * 2 + 1;\n+\n+    gen_arith(ctx, &a0, EXT_SIGN, tcg_gen_umax_tl, NULL);\n+    gen_arith(ctx, &a1, EXT_SIGN, tcg_gen_umax_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_padd_dws(DisasContext *ctx, arg_padd_dws *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = a->rs2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = a->rs2;\n+\n+    gen_arith(ctx, &a0, EXT_NONE, tcg_gen_add_tl, NULL);\n+    gen_arith(ctx, &a1, EXT_NONE, tcg_gen_add_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_psll_dws(DisasContext *ctx, arg_psll_dws *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = a->rs2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = a->rs2;\n+\n+    gen_shift(ctx, &a0, EXT_NONE, tcg_gen_shl_tl, NULL);\n+    gen_shift(ctx, &a1, EXT_NONE, tcg_gen_shl_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_psrl_dws(DisasContext *ctx, arg_psrl_dws *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = a->rs2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = a->rs2;\n+\n+    gen_shift(ctx, &a0, EXT_ZERO, tcg_gen_shr_tl, NULL);\n+    gen_shift(ctx, &a1, EXT_ZERO, tcg_gen_shr_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_psra_dws(DisasContext *ctx, arg_psra_dws *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = a->rs2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = a->rs2;\n+\n+    gen_shift(ctx, &a0, EXT_SIGN, tcg_gen_sar_tl, NULL);\n+    gen_shift(ctx, &a1, EXT_SIGN, tcg_gen_sar_tl, NULL);\n+\n+    return true;\n+}\n+\n+static bool trans_ppaire_dh(DisasContext *ctx, arg_ppaire_dh *a)\n+{\n+    REQUIRE_32BIT(ctx);\n+    REQUIRE_EXT(ctx, RVP);\n+    REQUIRE_ZBKB(ctx);\n+    arg_r a0, a1;\n+    a0.rd = (a->rd) * 2;\n+    a0.rs1 = (a->rs1) * 2;\n+    a0.rs2 = (a->rs2) * 2;\n+    a1.rd = (a->rd) * 2 + 1;\n+    a1.rs1 = (a->rs1) * 2 + 1;\n+    a1.rs2 = (a->rs2) * 2 + 1;\n+\n+    gen_arith(ctx, &a0, EXT_NONE, gen_pack, NULL);\n+    gen_arith(ctx, &a1, EXT_NONE, gen_pack, NULL);\n+    return true;\n+}\ndiff --git a/target/riscv/psimd_helper.c b/target/riscv/psimd_helper.c\nindex 5eede48581..4c91800128 100644\n--- a/target/riscv/psimd_helper.c\n+++ b/target/riscv/psimd_helper.c\n@@ -7012,3 +7012,2071 @@ uint64_t HELPER(pm4addau_h)(CPURISCVState *env, uint64_t rs1,\n     uint64_t prod3 = (uint64_t)s1_h3 * (uint64_t)s2_h3;\n     return d + prod0 + prod1 + prod2 + prod3;\n }\n+\n+/* Double-Width Operations (RV32 only, register pairs) */\n+\n+/**\n+ * PWADD.B - Packed widening byte to halfword addition (RV32)\n+ * rd_pair = {rs1[31:24]+rs2[31:24], rs1[23:16]+rs2[23:16],\n+ *            rs1[15:8]+rs2[15:8], rs1[7:0]+rs2[7:0]} (sign-extended)\n+ */\n+uint64_t HELPER(pwadd_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+        int16_t e2 = (int8_t)((rs2 >> (i * 8)) & 0xFF);\n+        int16_t res = e1 + e2;\n+        rd |= ((uint64_t)(uint16_t)res) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWADDA.B - Packed widening byte to halfword addition with accumulate (RV32)\n+ * rd_pair += {rs1[i] + rs2[i]}\n+ */\n+uint64_t HELPER(pwadda_b)(CPURISCVState *env, uint32_t rs1,\n+                          uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t result = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+        int16_t e2 = (int8_t)((rs2 >> (i * 8)) & 0xFF);\n+        int16_t acc = (int16_t)((rd >> (i * 16)) & 0xFFFF);\n+        int16_t res = acc + e1 + e2;\n+        result |= ((uint64_t)(uint16_t)res) << (i * 16);\n+    }\n+\n+    return result;\n+}\n+\n+/**\n+ * PWADDU.B - Packed widening byte to halfword unsigned addition (RV32)\n+ */\n+uint64_t HELPER(pwaddu_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+        uint16_t e2 = (uint8_t)((rs2 >> (i * 8)) & 0xFF);\n+        uint16_t res = e1 + e2;\n+        rd |= ((uint64_t)res) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWADDAU.B - Packed widening byte to halfword unsigned addition\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwaddau_b)(CPURISCVState *env, uint32_t rs1,\n+                           uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t result = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+        uint16_t e2 = (uint8_t)((rs2 >> (i * 8)) & 0xFF);\n+        uint16_t acc = (uint16_t)((rd >> (i * 16)) & 0xFFFF);\n+        uint16_t res = acc + e1 + e2;\n+        result |= ((uint64_t)res) << (i * 16);\n+    }\n+\n+    return result;\n+}\n+\n+/**\n+ * PWSUB.B - Packed widening byte to halfword subtraction (RV32)\n+ */\n+uint64_t HELPER(pwsub_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+        int16_t e2 = (int8_t)((rs2 >> (i * 8)) & 0xFF);\n+        int16_t res = e1 - e2;\n+        rd |= ((uint64_t)(uint16_t)res) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSUBA.B - Packed widening byte to halfword subtraction\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwsuba_b)(CPURISCVState *env, uint32_t rs1,\n+                          uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t result = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+        int16_t e2 = (int8_t)((rs2 >> (i * 8)) & 0xFF);\n+        int16_t acc = (int16_t)((rd >> (i * 16)) & 0xFFFF);\n+        int16_t res = acc + (e1 - e2);\n+        result |= ((uint64_t)(uint16_t)res) << (i * 16);\n+    }\n+\n+    return result;\n+}\n+\n+/**\n+ * PWSUBU.B - Packed widening byte to halfword unsigned subtraction (RV32)\n+ */\n+uint64_t HELPER(pwsubu_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+        uint16_t e2 = (uint8_t)((rs2 >> (i * 8)) & 0xFF);\n+        uint16_t res = e1 - e2;\n+        rd |= ((uint64_t)res) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSUBAU.B - Packed widening byte to halfword unsigned subtraction\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwsubau_b)(CPURISCVState *env, uint32_t rs1,\n+                           uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t result = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+        uint16_t e2 = (uint8_t)((rs2 >> (i * 8)) & 0xFF);\n+        uint16_t acc = (uint16_t)((rd >> (i * 16)) & 0xFFFF);\n+        uint16_t res = acc + (e1 - e2);\n+        result |= ((uint64_t)res) << (i * 16);\n+    }\n+\n+    return result;\n+}\n+\n+/**\n+ * PWSLLI.B - Packed widening shift left immediate (byte to halfword)\n+ */\n+uint64_t HELPER(pwslli_b)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+    uint64_t rd = 0;\n+    uint8_t shamt = imm & 0x0F;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+        uint16_t res = e1 << shamt;\n+        rd |= ((uint64_t)res) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSLL.BS - Packed widening shift left from register (byte to halfword)\n+ */\n+uint64_t HELPER(pwsll_bs)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+    uint8_t shamt = rs2 & 0x1F;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t e1 = (uint8_t)((rs1 >> (i * 8)) & 0xFF);\n+        uint16_t res = e1 << shamt;\n+        rd |= ((uint64_t)res) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSLAI.B - Packed widening signed shift left immediate (byte to halfword)\n+ */\n+uint64_t HELPER(pwslai_b)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+    uint64_t rd = 0;\n+    uint8_t shamt = imm & 0x0F;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+        int16_t res = e1 << shamt;\n+        rd |= ((uint64_t)(uint16_t)res) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSLA.BS - Packed widening signed shift left from register (byte to halfword)\n+ */\n+uint64_t HELPER(pwsla_bs)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+    uint8_t shamt = rs2 & 0x1F;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int16_t e1 = (int8_t)((rs1 >> (i * 8)) & 0xFF);\n+        int16_t res = e1 << shamt;\n+        rd |= ((uint64_t)(uint16_t)res) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWADD.H - Packed widening halfword to word addition (RV32)\n+ */\n+uint64_t HELPER(pwadd_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        int32_t e2 = (int16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+        int32_t res = e1 + e2;\n+        rd |= ((uint64_t)(uint32_t)res) << (i * 32);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWADDA.H - Packed widening halfword to word addition with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwadda_h)(CPURISCVState *env, uint32_t rs1,\n+                          uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t result = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        int32_t e2 = (int16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+        int32_t acc = (int32_t)((rd >> (i * 32)) & 0xFFFFFFFF);\n+        int32_t res = acc + e1 + e2;\n+        result |= ((uint64_t)(uint32_t)res) << (i * 32);\n+    }\n+\n+    return result;\n+}\n+\n+/**\n+ * PWADDU.H - Packed widening halfword to word unsigned addition (RV32)\n+ */\n+uint64_t HELPER(pwaddu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        uint32_t e2 = (uint16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+        uint32_t res = e1 + e2;\n+        rd |= ((uint64_t)res) << (i * 32);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWADDAU.H - Packed widening halfword to word unsigned addition\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwaddau_h)(CPURISCVState *env, uint32_t rs1,\n+                           uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t result = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        uint32_t e2 = (uint16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+        uint32_t acc = (uint32_t)((rd >> (i * 32)) & 0xFFFFFFFF);\n+        uint32_t res = acc + e1 + e2;\n+        result |= ((uint64_t)res) << (i * 32);\n+    }\n+\n+    return result;\n+}\n+\n+/**\n+ * PWSUB.H - Packed widening halfword to word subtraction (RV32)\n+ */\n+uint64_t HELPER(pwsub_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        int32_t e2 = (int16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+        int32_t res = e1 - e2;\n+        rd |= ((uint64_t)(uint32_t)res) << (i * 32);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSUBA.H - Packed widening halfword to word subtraction\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwsuba_h)(CPURISCVState *env, uint32_t rs1,\n+                          uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t result = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        int32_t e2 = (int16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+        int32_t acc = (int32_t)((rd >> (i * 32)) & 0xFFFFFFFF);\n+        int32_t res = acc + (e1 - e2);\n+        result |= ((uint64_t)(uint32_t)res) << (i * 32);\n+    }\n+\n+    return result;\n+}\n+\n+/**\n+ * PWSUBU.H - Packed widening halfword to word unsigned subtraction (RV32)\n+ */\n+uint64_t HELPER(pwsubu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        uint32_t e2 = (uint16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+        uint32_t res = e1 - e2;\n+        rd |= ((uint64_t)res) << (i * 32);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSUBAU.H - Packed widening halfword to word unsigned subtraction\n+ * with accumulate (RV32)\n+ */\n+uint64_t HELPER(pwsubau_h)(CPURISCVState *env, uint32_t rs1,\n+                           uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t result = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        uint32_t e2 = (uint16_t)((rs2 >> (i * 16)) & 0xFFFF);\n+        uint32_t acc = (uint32_t)((rd >> (i * 32)) & 0xFFFFFFFF);\n+        uint32_t res = acc + (e1 - e2);\n+        result |= ((uint64_t)res) << (i * 32);\n+    }\n+\n+    return result;\n+}\n+\n+/**\n+ * PWSLLI.H - Packed widening shift left immediate (halfword to word)\n+ */\n+uint64_t HELPER(pwslli_h)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+    uint64_t rd = 0;\n+    uint8_t shamt = imm & 0x1F;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        uint32_t res = e1 << shamt;\n+        rd |= ((uint64_t)res) << (i * 32);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSLL.HS - Packed widening shift left from register (halfword to word)\n+ */\n+uint64_t HELPER(pwsll_hs)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+    uint8_t shamt = rs2 & 0x1F;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t e1 = (uint16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        uint32_t res = e1 << shamt;\n+        rd |= ((uint64_t)res) << (i * 32);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSLAI.H - Packed widening signed shift left immediate (halfword to word)\n+ */\n+uint64_t HELPER(pwslai_h)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+    uint64_t rd = 0;\n+    uint8_t shamt = imm & 0x1F;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        int32_t res = e1 << shamt;\n+        rd |= ((uint64_t)(uint32_t)res) << (i * 32);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PWSLA.HS - Packed widening signed shift left from register (halfword to word)\n+ */\n+uint64_t HELPER(pwsla_hs)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+    uint8_t shamt = rs2 & 0x1F;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int32_t e1 = (int16_t)((rs1 >> (i * 16)) & 0xFFFF);\n+        int32_t res = e1 << shamt;\n+        rd |= ((uint64_t)(uint32_t)res) << (i * 32);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * WADD - Widening signed addition (RV32)\n+ */\n+uint64_t HELPER(wadd)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    int64_t a = (int32_t)rs1;\n+    int64_t b = (int32_t)rs2;\n+    return (uint64_t)(a + b);\n+}\n+\n+/**\n+ * WADDA - Widening signed addition with accumulate (RV32)\n+ */\n+uint64_t HELPER(wadda)(CPURISCVState *env, uint32_t rs1,\n+                       uint32_t rs2, uint64_t rd)\n+{\n+    int64_t a = (int32_t)rs1;\n+    int64_t b = (int32_t)rs2;\n+    int64_t acc = (int64_t)rd;\n+    return (uint64_t)(acc + a + b);\n+}\n+\n+/**\n+ * WADDU - Widening unsigned addition (RV32)\n+ */\n+uint64_t HELPER(waddu)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t a = rs1;\n+    uint64_t b = rs2;\n+    return a + b;\n+}\n+\n+/**\n+ * WADDAU - Widening unsigned addition with accumulate (RV32)\n+ */\n+uint64_t HELPER(waddau)(CPURISCVState *env, uint32_t rs1,\n+                        uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t acc = rd;\n+    return acc + rs1 + rs2;\n+}\n+\n+/**\n+ * WSUB - Widening signed subtraction (RV32)\n+ */\n+uint64_t HELPER(wsub)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    int64_t a = (int32_t)rs1;\n+    int64_t b = (int32_t)rs2;\n+    return (uint64_t)(a - b);\n+}\n+\n+/**\n+ * WSUBA - Widening signed subtraction with accumulate (RV32)\n+ */\n+uint64_t HELPER(wsuba)(CPURISCVState *env, uint32_t rs1,\n+                       uint32_t rs2, uint64_t rd)\n+{\n+    int64_t a = (int32_t)rs1;\n+    int64_t b = (int32_t)rs2;\n+    int64_t acc = (int64_t)rd;\n+    return (uint64_t)(acc + a - b);\n+}\n+\n+/**\n+ * WSUBU - Widening unsigned subtraction (RV32)\n+ */\n+uint64_t HELPER(wsubu)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t a = rs1;\n+    uint64_t b = rs2;\n+    return a - b;\n+}\n+\n+/**\n+ * WSUBAU - Widening unsigned subtraction with accumulate (RV32)\n+ */\n+uint64_t HELPER(wsubau)(CPURISCVState *env, uint32_t rs1,\n+                        uint32_t rs2, uint64_t rd)\n+{\n+    uint64_t acc = rd;\n+    return acc + rs1 - rs2;\n+}\n+\n+/**\n+ * WSLLI - Widening logical shift left immediate (RV32)\n+ */\n+uint64_t HELPER(wslli)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+    uint64_t a = rs1;\n+    uint8_t shamt = imm & 0x3F;\n+    return a << shamt;\n+}\n+\n+/**\n+ * WSLL - Widening logical shift left from register (RV32)\n+ */\n+uint64_t HELPER(wsll)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t a = rs1;\n+    uint8_t shamt = rs2 & 0x3F;\n+    return a << shamt;\n+}\n+\n+/**\n+ * WSLAI - Widening signed shift left immediate (RV32)\n+ */\n+uint64_t HELPER(wslai)(CPURISCVState *env, uint32_t rs1, uint32_t imm)\n+{\n+    int64_t a = (int32_t)rs1;\n+    uint8_t shamt = imm & 0x3F;\n+    return (uint64_t)(a << shamt);\n+}\n+\n+/**\n+ * WSLA - Widening signed shift left from register (RV32)\n+ */\n+uint64_t HELPER(wsla)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    int64_t a = (int32_t)rs1;\n+    uint8_t shamt = rs2 & 0x3F;\n+    return (uint64_t)(a << shamt);\n+}\n+\n+/**\n+ * WZIP8P - Double-width interleave bytes (RV32)\n+ */\n+uint64_t HELPER(wzip8p)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint64_t b1 = (uint64_t)EXTRACT8(rs1, i) << 16 * i;\n+        uint64_t b2 = (uint64_t)EXTRACT8(rs2, i) << (16 * i + 8);\n+        rd = rd | b2 | b1;\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * WZIP16P - Double-width interleave halfwords (RV32)\n+ */\n+uint64_t HELPER(wzip16p)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint64_t h1 = (uint64_t)EXTRACT16(rs1, i) << (32 * i);\n+        uint64_t h2 = (uint64_t)EXTRACT16(rs2, i) << (32 * i + 16);\n+        rd = rd | h2 | h1;\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PREDSUM.DBS - Double-width signed reduction sum of bytes (RV32)\n+ */\n+uint32_t HELPER(predsum_dbs)(CPURISCVState *env, uint32_t rs1_lo,\n+                             uint32_t rs1_hi, uint32_t rs2)\n+{\n+    int64_t sum = (int32_t)rs2;\n+    int64_t s1 = ((int64_t)rs1_hi << 32) | rs1_lo;\n+\n+    for (int i = 0; i < 8; i++) {\n+        int8_t b = (int8_t)((s1 >> (i * 8)) & 0xFF);\n+        sum += b;\n+    }\n+\n+    return (uint32_t)sum;\n+}\n+\n+/**\n+ * PREDSUMU.DBS - Double-width unsigned reduction sum of bytes (RV32)\n+ */\n+uint32_t HELPER(predsumu_dbs)(CPURISCVState *env, uint32_t rs1_lo,\n+                              uint32_t rs1_hi, uint32_t rs2)\n+{\n+    uint64_t sum = rs2;\n+    uint64_t s1 = ((uint64_t)rs1_hi << 32) | rs1_lo;\n+\n+    for (int i = 0; i < 8; i++) {\n+        uint8_t b = (uint8_t)((s1 >> (i * 8)) & 0xFF);\n+        sum += b;\n+    }\n+\n+    return (uint32_t)sum;\n+}\n+\n+/**\n+ * PREDSUM.DHS - Double-width signed reduction sum of halfwords (RV32)\n+ */\n+uint32_t HELPER(predsum_dhs)(CPURISCVState *env, uint32_t rs1_lo,\n+                             uint32_t rs1_hi, uint32_t rs2)\n+{\n+    int64_t sum = (int32_t)rs2;\n+    int64_t s1 = ((int64_t)rs1_hi << 32) | rs1_lo;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int16_t h = (int16_t)((s1 >> (i * 16)) & 0xFFFF);\n+        sum += h;\n+    }\n+\n+    return (uint32_t)sum;\n+}\n+\n+/**\n+ * PREDSUMU.DHS - Double-width unsigned reduction sum of halfwords (RV32)\n+ */\n+uint32_t HELPER(predsumu_dhs)(CPURISCVState *env, uint32_t rs1_lo,\n+                              uint32_t rs1_hi, uint32_t rs2)\n+{\n+    uint64_t sum = rs2;\n+    uint64_t s1 = ((uint64_t)rs1_hi << 32) | rs1_lo;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t h = (uint16_t)((s1 >> (i * 16)) & 0xFFFF);\n+        sum += h;\n+    }\n+\n+    return (uint32_t)sum;\n+}\n+\n+\n+/* Narrowing Operations (RV32 only, register pair sources) */\n+\n+/**\n+ * PNSRLI.B - Narrowing logical shift right immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrli_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        uint8_t result = (s1_h >> (shamt & 0xF)) & 0xFF;\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PNSRL.BS - Narrowing logical shift right from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrl_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        uint32_t s1_h_z32 = (uint32_t)s1_h;\n+        uint8_t result = (s1_h_z32 >> (shamt & 0x1F)) & 0xFF;\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PNSRAI.B - Narrowing arithmetic shift right immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrai_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        int32_t s1_h_s32 = (int32_t)(int16_t)s1_h;\n+        int32_t s1_h_s24 = (s1_h_s32 << 8) >> 8;\n+        uint8_t result = s1_h_s24 >> (shamt & 0xF) & 0xFF;\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PNSRA.BS - Narrowing arithmetic shift right from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsra_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        int64_t s1_h_s64 = (int64_t)(int16_t)s1_h;\n+        s1_h_s64 = (s1_h_s64 << 24) >> 24;\n+        uint8_t result = s1_h_s64 >> (shamt & 0x1F) & 0xFF;\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PNSRARI.B - Narrowing arithmetic shift right with rounding\n+ * immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrari_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        int32_t s1_h_s32 = (int32_t)(int16_t)s1_h;\n+        int32_t s1_h_s24 = (s1_h_s32 << 8) >> 8;\n+        uint32_t shx_25bit = ((uint32_t)s1_h_s24 << 1);\n+        uint32_t shx = (shx_25bit >> (shamt & 0xF)) & 0x1FF;\n+        uint8_t result = ((shx + 1) >> 1) & 0xFF;\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PNSRAR.BS - Narrowing arithmetic shift right with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnsrar_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        int64_t s1_h_s64 = (int64_t)(int16_t)s1_h;\n+        int64_t s1_h_s40 = (s1_h_s64 << 24) >> 24;\n+        uint64_t shx_41bit = ((uint64_t)s1_h_s40 << 1);\n+        uint64_t shx = (shx_41bit >> (shamt & 0x1F)) & 0x1FF;\n+        uint8_t result = ((shx + 1) >> 1) & 0xFF;\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPI.B - Narrowing clip signed (64-bit to 32-bit) with immediate shift\n+ */\n+uint32_t HELPER(pnclipi_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        int32_t s1_h_s32 = (int32_t)(int16_t)s1_h;\n+        int16_t shx = (int16_t)(s1_h_s32 >> (shamt & 0xF));\n+        uint8_t result = 0;\n+\n+        if (shx < -128) {\n+            sat = 1;\n+            result = 0x80; /* -128 */\n+        } else if (shx > 127) {\n+            sat = 1;\n+            result = 0x7F; /* 127 */\n+        } else {\n+            result = (uint8_t)shx;\n+        }\n+        rd |= ((uint32_t)result << (i * 8));\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPRI.B - Narrowing clip signed with rounding\n+ * (64-bit to 32-bit) with immediate shift\n+ */\n+uint32_t HELPER(pnclipri_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        int32_t s1_h_s32 = (int32_t)(int16_t)s1_h;\n+        uint64_t shx_33bit = ((uint32_t)s1_h_s32 << 1);\n+        uint32_t shx = (shx_33bit >> (shamt & 0xF)) & 0x1FFFF;\n+        uint16_t round_shx = (uint16_t)((shx + 1) >> 1);\n+        int16_t round_shx_s = (int16_t)round_shx;\n+        uint8_t result = 0;\n+\n+        if (round_shx_s < -128) {\n+            sat = 1;\n+            result = 0x80;\n+        } else if (round_shx_s > 127) {\n+            sat = 1;\n+            result = 0x7F;\n+        } else {\n+            result = (uint8_t)round_shx;\n+        }\n+\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPIU.B - Narrowing clip unsigned (64-bit to 32-bit) with immediate shift\n+ */\n+uint32_t HELPER(pnclipiu_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        uint16_t shx = s1_h >> (shamt & 0xF);\n+        uint8_t result = 0;\n+\n+        if (shx > 0x00FF) {\n+            sat = 1;\n+            result = 0xFF;\n+        } else {\n+            result = (uint8_t)(shx & 0xFF);\n+        }\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPRIU.B - Narrowing clip unsigned with rounding\n+ * (64-bit to 32-bit) with immediate shift\n+ */\n+uint32_t HELPER(pnclipriu_b)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        uint32_t shx_17bit = ((uint32_t)s1_h << 1);\n+        uint32_t shx = shx_17bit >> (shamt & 0xF);\n+        uint16_t round_shx = (uint16_t)((shx + 1) >> 1);\n+        uint8_t result = 0;\n+\n+        if (round_shx > 0x00FF) {\n+            sat = 1;\n+            result = 0xFF;\n+        } else {\n+            result = (uint8_t)(round_shx & 0xFF);\n+        }\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIP.BS - Narrowing clip signed from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnclip_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        int64_t s1_h_s64 = (int64_t)(int16_t)s1_h;\n+        int64_t s1_h_s48 = (s1_h_s64 << 16) >> 16;\n+        int16_t shx = (int16_t)(s1_h_s48 >> (shamt & 0x1F));\n+        uint8_t result = 0;\n+\n+        if (shx < -128) {\n+            sat = 1;\n+            result = 0x80;\n+        } else if (shx > 127) {\n+            sat = 1;\n+            result = 0x7F;\n+        } else {\n+            result = (uint8_t)shx;\n+        }\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPR.BS - Narrowing clip signed with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnclipr_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        int64_t s1_h_s64 = (int64_t)(int16_t)s1_h;\n+        int64_t s1_h_s48 = (s1_h_s64 << 16) >> 16;\n+        uint64_t shx_49bit = ((uint64_t)s1_h_s48 << 1);\n+        uint32_t shx = (shx_49bit >> (shamt & 0x1F)) & 0x1FFFF;\n+        uint16_t round_shx = (uint16_t)((shx + 1) >> 1);\n+        int16_t round_shx_s = (int16_t)round_shx;\n+        uint8_t result = 0;\n+\n+        if (round_shx_s < -128) {\n+            sat = 1;\n+            result = 0x80;\n+        } else if (round_shx_s > 127) {\n+            sat = 1;\n+            result = 0x7F;\n+        } else {\n+            result = (uint8_t)round_shx;\n+        }\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPU.BS - Narrowing clip unsigned from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnclipu_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        uint32_t s1_h_z32 = (uint32_t)s1_h;\n+        uint16_t shx = (s1_h_z32 >> (shamt & 0x1F)) & 0xFFFF;\n+        uint8_t result = 0;\n+\n+        if (shx > 0x00FF) {\n+            sat = 1;\n+            result = 0xFF;\n+        } else {\n+            result = (uint8_t)(shx & 0xFF);\n+        }\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPRU.BS - Narrowing clip unsigned with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(pnclipru_bs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint16_t s1_h = (s1 >> (i * 16)) & 0xFFFF;\n+        uint32_t s1_h_z32 = (uint32_t)s1_h;\n+        uint64_t shx_33bit = ((uint64_t)s1_h_z32 << 1);\n+        uint32_t shx = (shx_33bit >> (shamt & 0x1F)) & 0x1FFFF;\n+        uint16_t round_shx = (uint16_t)((shx + 1) >> 1);\n+        uint8_t result = 0;\n+\n+        if (round_shx > 0x00FF) {\n+            sat = 1;\n+            result = 0xFF;\n+        } else {\n+            result = (uint8_t)(round_shx & 0xFF);\n+        }\n+        rd |= ((uint32_t)result) << (i * 8);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNSRLI.H - Narrowing logical shift right immediate\n+ * (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrli_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    uint32_t s1_low  = (uint32_t)(s1 & 0xFFFFFFFF);\n+    uint32_t s1_high = (uint32_t)((s1 >> 32) & 0xFFFFFFFF);\n+\n+    uint16_t rd_low  = (s1_low  >> (shamt & 0x1F)) & 0xFFFF;\n+    uint16_t rd_high = (s1_high >> (shamt & 0x1F)) & 0xFFFF;\n+\n+    rd = ((uint32_t)rd_high << 16) | rd_low;\n+    return rd;\n+}\n+\n+/**\n+ * PNSRAI.H - Narrowing arithmetic shift right immediate\n+ * (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrai_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    uint32_t s1_low  = (uint32_t)(s1 & 0xFFFFFFFF);\n+    int64_t s1_low_s64 = (int64_t)(int32_t)s1_low;\n+    int64_t s1_low_s48 = (s1_low_s64 << 16) >> 16;\n+\n+    uint32_t s1_high = (uint32_t)((s1 >> 32) & 0xFFFFFFFF);\n+    int64_t s1_high_s64 = (int64_t)(int32_t)s1_high;\n+    int64_t s1_high_s48 = (s1_high_s64 << 16) >> 16;\n+\n+    uint16_t rd_low  = (s1_low_s48  >> (shamt & 0x1F)) & 0xFFFF;\n+    uint16_t rd_high = (s1_high_s48 >> (shamt & 0x1F)) & 0xFFFF;\n+\n+    rd = ((uint32_t)rd_high << 16) | rd_low;\n+    return rd;\n+}\n+\n+/**\n+ * PNSRARI.H - Narrowing arithmetic shift right with rounding\n+ * immediate (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrari_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t s1_w = (s1 >> (i * 32)) & 0xFFFFFFFF;\n+        int64_t s1_w_s64 = (int64_t)(int32_t)s1_w;\n+        int64_t s1_w_s48 = (s1_w_s64 << 16) >> 16;\n+        uint64_t shx_49bit = ((uint64_t)s1_w_s48 << 1);\n+        uint32_t shx = (shx_49bit >> (shamt & 0x1F)) & 0x1FFFF;\n+        rd |= ((uint16_t)((shx + 1) >> 1)) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PNSRL.HS - Narrowing logical shift right from register\n+ * (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrl_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    uint32_t s1_low  = (uint32_t)(s1 & 0xFFFFFFFF);\n+    uint32_t s1_high = (uint32_t)((s1 >> 32) & 0xFFFFFFFF);\n+\n+    uint16_t rd_low  = (s1_low  >> (shamt & 0x1F)) & 0xFFFF;\n+    uint16_t rd_high = (s1_high >> (shamt & 0x1F)) & 0xFFFF;\n+\n+    rd = ((uint32_t)rd_high << 16) | rd_low;\n+    return rd;\n+}\n+\n+/**\n+ * PNSRA.HS - Narrowing arithmetic shift right from register\n+ * (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsra_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    uint32_t s1_low  = (uint32_t)(s1 & 0xFFFFFFFF);\n+    uint32_t s1_high = (uint32_t)((s1 >> 32) & 0xFFFFFFFF);\n+\n+    uint16_t rd_low  = (s1_low  >> (shamt & 0x1F)) & 0xFFFF;\n+    uint16_t rd_high = (s1_high >> (shamt & 0x1F)) & 0xFFFF;\n+\n+    rd = ((uint32_t)rd_high << 16) | rd_low;\n+    return rd;\n+}\n+\n+/**\n+ * PNSRAR.HS - Narrowing arithmetic shift right with rounding\n+ * from register (64-bit to 32-bit, word to halfword)\n+ */\n+uint32_t HELPER(pnsrar_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t s1_w = (s1 >> (i * 32)) & 0xFFFFFFFF;\n+        int64_t s1_w_s64 = (int64_t)(int32_t)s1_w;\n+        int64_t s1_w_s48 = (s1_w_s64 << 16) >> 16;\n+        uint64_t shx_49bit = ((uint64_t)s1_w_s48 << 1);\n+        uint32_t shx = (shx_49bit >> (shamt & 0x1F)) & 0x1FFFF;\n+        rd |= ((uint16_t)((shx + 1) >> 1)) << (i * 16);\n+    }\n+\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIP.HS - Narrowing signed clip from register shift (word to halfword)\n+ * For each word: arithmetic right shift, clip to signed 16-bit\n+ *   shx = (int32_t)rs1[i] >> shamt\n+ *   result = sat16(shx)\n+ */\n+uint32_t HELPER(pnclip_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+    uint8_t shift = shamt & 0x1F;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t s1_w = EXTRACT32(s1, i);\n+        int64_t s1_w_s64 = (int64_t)(int32_t)s1_w;\n+        int32_t shx = (int32_t)(s1_w_s64 >> shift);\n+        uint16_t result;\n+\n+        if (shx < -32768) {\n+            sat = 1;\n+            result = 0x8000;\n+        } else if (shx > 32767) {\n+            sat = 1;\n+            result = 0x7FFF;\n+        } else {\n+            result = (uint16_t)shx;\n+        }\n+\n+        rd = INSERT16(rd, result, i);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPR.HS - Narrowing signed clip with rounding\n+ * from register (word to halfword)\n+ * For each word: ((int32_t)rs1[i] << 1) >> shamt, round, clip to signed 16-bit\n+ *   shx_65bit = ((int64_t)rs1[i] << 1)\n+ *   shx = (shx_65bit >> shamt) & mask\n+ *   round = (shx + 1) >> 1\n+ *   result = sat16(round)\n+ */\n+uint32_t HELPER(pnclipr_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+    uint8_t shift = shamt & 0x1F;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t s1_w = EXTRACT32(s1, i);\n+        int64_t s1_w_s64 = (int64_t)(int32_t)s1_w;\n+        __uint128_t shx_65bit = (__uint128_t)s1_w_s64 << 1;\n+        uint64_t shx = (uint64_t)(shx_65bit >> shift) & 0x1FFFFFFFF;\n+        int32_t round_shx = (int32_t)((shx + 1) >> 1);\n+        uint16_t result;\n+\n+        if (round_shx < -32768) {\n+            sat = 1;\n+            result = 0x8000;\n+        } else if (round_shx > 32767) {\n+            sat = 1;\n+            result = 0x7FFF;\n+        } else {\n+            result = (uint16_t)round_shx;\n+        }\n+\n+        rd = INSERT16(rd, result, i);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPI.H - Narrowing signed clip from immediate shift (word to halfword)\n+ * For each word: rs1[i] >> imm, clip to signed 16-bit\n+ */\n+uint32_t HELPER(pnclipi_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    return HELPER(pnclip_hs)(env, s1, shamt);\n+}\n+\n+/**\n+ * PNCLIPRI.H - Narrowing signed clip with rounding\n+ * from immediate shift (word to halfword)\n+ * For each word: (rs1[i] << 1) >> imm, round, clip to signed 16-bit\n+ */\n+uint32_t HELPER(pnclipri_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    return HELPER(pnclipr_hs)(env, s1, shamt);\n+}\n+\n+/**\n+ * PNCLIPU.HS - Narrowing unsigned clip from register shift (word to halfword)\n+ * For each word: shift right, clip to unsigned 16-bit\n+ *   shx = rs1[i] >> shamt\n+ *   result = (shx > 65535) ? 0xFFFF : shx\n+ */\n+uint32_t HELPER(pnclipu_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+    uint8_t shift = shamt & 0x1F;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t s1_w = EXTRACT32(s1, i);\n+        uint32_t shx = s1_w >> shift;\n+        uint16_t result;\n+\n+        if (shx > 65535) {\n+            sat = 1;\n+            result = 0xFFFF;\n+        } else {\n+            result = (uint16_t)shx;\n+        }\n+\n+        rd = INSERT16(rd, result, i);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPRU.HS - Narrowing unsigned clip with rounding\n+ * from register (word to halfword)\n+ * For each word: (rs1[i] << 1) >> shamt, round, clip to unsigned 16-bit\n+ *   shx = ((rs1[i] << 1) >> shamt)\n+ *   round = (shx + 1) >> 1\n+ *   result = (round > 65535) ? 0xFFFF : round\n+ */\n+uint32_t HELPER(pnclipru_hs)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint32_t rd = 0;\n+    int sat = 0;\n+    uint8_t shift = shamt & 0x1F;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint32_t s1_w = EXTRACT32(s1, i);\n+        uint64_t shx_33bit = (uint64_t)s1_w << 1;\n+        uint64_t shx = shx_33bit >> shift;\n+        uint32_t round_shx = (uint32_t)((shx + 1) >> 1);\n+        uint16_t result;\n+\n+        if (round_shx > 65535) {\n+            sat = 1;\n+            result = 0xFFFF;\n+        } else {\n+            result = (uint16_t)round_shx;\n+        }\n+\n+        rd = INSERT16(rd, result, i);\n+    }\n+\n+    if (sat) {\n+        env->vxsat = 1;\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PNCLIPIU.H - Narrowing unsigned clip from immediate shift (word to halfword)\n+ * For each word: rs1[i] >> imm, clip to unsigned 16-bit\n+ */\n+uint32_t HELPER(pnclipiu_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    return HELPER(pnclipu_hs)(env, s1, shamt);\n+}\n+\n+/**\n+ * PNCLIPRIU.H - Narrowing unsigned clip with rounding\n+ * from immediate shift (word to halfword)\n+ * For each word: (rs1[i] << 1) >> imm, round, clip to unsigned 16-bit\n+ */\n+uint32_t HELPER(pnclipriu_h)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    return HELPER(pnclipru_hs)(env, s1, shamt);\n+}\n+\n+/**\n+ * NSRLI - Narrowing logical shift right immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrli)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    return (s1 >> (shamt & 0x3F)) & 0xFFFFFFFF;\n+}\n+\n+/**\n+ * NSRAI - Narrowing arithmetic shift right immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrai)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+    __int128_t s1_s96 = (s1_s128 << 32) >> 32;\n+    return (uint32_t)(s1_s96 >> (shamt & 0x3F)) & 0xFFFFFFFF;\n+}\n+\n+/**\n+ * NSRARI - Narrowing arithmetic shift right with rounding\n+ * immediate (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrari)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+    __int128_t s1_s96 = (s1_s128 << 32) >> 32;\n+    __uint128_t shx_97bit = ((__uint128_t)s1_s96 << 1);\n+    uint64_t shx = (uint64_t)(shx_97bit >> (shamt & 0x3F)) & 0x1FFFFFFFF;\n+    return (uint32_t)((shx + 1) >> 1);\n+}\n+\n+/**\n+ * NSRL - Narrowing logical shift right from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrl)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    return (s1 >> (shamt & 0x3F)) & 0xFFFFFFFF;\n+}\n+\n+/**\n+ * NSRA - Narrowing arithmetic shift right from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsra)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+    __int128_t s1_s96 = (s1_s128 << 32) >> 32;\n+    return (uint32_t)(s1_s96 >> (shamt & 0x3F)) & 0xFFFFFFFF;\n+}\n+\n+/**\n+ * NSRAR - Narrowing arithmetic shift right with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nsrar)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+    __int128_t s1_s96 = (s1_s128 << 32) >> 32;\n+    __uint128_t shx_97bit = ((__uint128_t)s1_s96 << 1);\n+    uint64_t shx = (uint64_t)(shx_97bit >> (shamt & 0x3F)) & 0x1FFFFFFFF;\n+    return (uint32_t)((shx + 1) >> 1);\n+}\n+\n+/**\n+ * NCLIPI - Narrowing clip signed with immediate shift (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipi)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+    int64_t shx = (int64_t)(s1_s128 >> (shamt & 0x3F));\n+\n+    if (shx < -2147483648LL) {\n+        env->vxsat = 1;\n+        return 0x80000000U;\n+    } else if (shx > 2147483647LL) {\n+        env->vxsat = 1;\n+        return 0x7FFFFFFFU;\n+    } else {\n+        return (uint32_t)(shx & 0xFFFFFFFF);\n+    }\n+}\n+\n+/**\n+ * NCLIPRI - Narrowing clip signed with rounding and immediate\n+ * shift (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipri)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    typedef struct {\n+        __uint128_t low;\n+        uint8_t high;\n+    } Uint129;\n+\n+    Uint129 left_shift_1(__int128_t s1_s128)\n+    {\n+        Uint129 result;\n+        __uint128_t us1 = (__uint128_t)s1_s128;\n+        result.low = us1 << 1;\n+        result.high = (us1 >> 127) & 0x1;\n+        return result;\n+    }\n+\n+    Uint129 right_shift(Uint129 val, uint32_t smt)\n+    {\n+        Uint129 result;\n+        if (smt == 0) {\n+            return val;\n+        } else if (smt >= 129) {\n+            result.low = 0;\n+            result.high = 0;\n+        } else if (smt == 128) {\n+            result.low = val.high;\n+            result.high = 0;\n+        } else {\n+            result.low = (val.low >> smt) |\n+                         ((__uint128_t)val.high << (128 - smt));\n+            result.high = (val.high >> smt);\n+        }\n+        return result;\n+    }\n+\n+    __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+    Uint129 shx_129bit = left_shift_1(s1_s128);\n+    Uint129 shx = right_shift(shx_129bit, shamt & 0x3F);\n+    int64_t round_shx = (int64_t)((shx.low + 1) >> 1);\n+\n+    if (round_shx < -2147483648LL) {\n+        env->vxsat = 1;\n+        return 0x80000000U;\n+    } else if (round_shx > 2147483647LL) {\n+        env->vxsat = 1;\n+        return 0x7FFFFFFFU;\n+    } else {\n+        return (uint32_t)round_shx;\n+    }\n+}\n+\n+/**\n+ * NCLIPIU - Narrowing clip unsigned with immediate shift (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipiu)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint64_t shx = s1 >> (shamt & 0x3F);\n+\n+    if (shx > 4294967295ULL) {\n+        env->vxsat = 1;\n+        return 0xFFFFFFFFU;\n+    } else {\n+        return (uint32_t)(shx & 0xFFFFFFFF);\n+    }\n+}\n+\n+/**\n+ * NCLIPRIU - Narrowing clip unsigned with rounding and immediate\n+ * shift (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipriu)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    __uint128_t shx_65bit = (s1 << 1);\n+    __uint128_t shx = shx_65bit >> (shamt & 0x3F);\n+    uint64_t round_shx = (shx + 1) >> 1;\n+\n+    if (round_shx > 4294967295ULL) {\n+        env->vxsat = 1;\n+        return 0xFFFFFFFFU;\n+    } else {\n+        return (uint32_t)(round_shx & 0xFFFFFFFF);\n+    }\n+}\n+\n+/**\n+ * NCLIP - Narrowing clip signed from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclip)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+    int64_t shx = (int64_t)(s1_s128 >> (shamt & 0x3F));\n+\n+    if (shx < -2147483648LL) {\n+        env->vxsat = 1;\n+        return 0x80000000U;\n+    } else if (shx > 2147483647LL) {\n+        env->vxsat = 1;\n+        return 0x7FFFFFFFU;\n+    } else {\n+        return (uint32_t)(shx & 0xFFFFFFFF);\n+    }\n+}\n+\n+/**\n+ * NCLIPR - Narrowing clip signed with rounding from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipr)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    typedef struct {\n+        __uint128_t low;\n+        uint8_t high;\n+    } Uint129;\n+\n+    Uint129 left_shift_1(__int128_t s1_s128)\n+    {\n+        Uint129 result;\n+        __uint128_t us1 = (__uint128_t)s1_s128;\n+        result.low = us1 << 1;\n+        result.high = (us1 >> 127) & 0x1;\n+        return result;\n+    }\n+\n+    Uint129 right_shift(Uint129 val, uint32_t smt)\n+    {\n+        Uint129 result;\n+        if (smt == 0) {\n+            return val;\n+        } else if (smt >= 129) {\n+            result.low = 0;\n+            result.high = 0;\n+        } else if (smt == 128) {\n+            result.low = val.high;\n+            result.high = 0;\n+        } else {\n+            result.low = (val.low >> smt) |\n+                         ((__uint128_t)val.high << (128 - smt));\n+            result.high = (val.high >> smt);\n+        }\n+        return result;\n+    }\n+\n+    __int128_t s1_s128 = (__int128_t)((int64_t)s1);\n+    Uint129 shx_129bit = left_shift_1(s1_s128);\n+    Uint129 shx = right_shift(shx_129bit, shamt & 0x3F);\n+    int64_t round_shx = (int64_t)((shx.low + 1) >> 1);\n+\n+    if (round_shx < -2147483648LL) {\n+        env->vxsat = 1;\n+        return 0x80000000U;\n+    } else if (round_shx > 2147483647LL) {\n+        env->vxsat = 1;\n+        return 0x7FFFFFFFU;\n+    } else {\n+        return (uint32_t)round_shx;\n+    }\n+}\n+\n+/**\n+ * NCLIPU - Narrowing clip unsigned from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipu)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    uint64_t shx = s1 >> (shamt & 0x3F);\n+\n+    if (shx > 4294967295ULL) {\n+        env->vxsat = 1;\n+        return 0xFFFFFFFFU;\n+    } else {\n+        return (uint32_t)(shx & 0xFFFFFFFF);\n+    }\n+}\n+\n+/**\n+ * NCLIPRU - Narrowing clip unsigned with rounding\n+ * from register (64-bit to 32-bit)\n+ */\n+uint32_t HELPER(nclipru)(CPURISCVState *env, uint64_t s1, uint32_t shamt)\n+{\n+    __uint128_t shx_65bit = (s1 << 1);\n+    __uint128_t shx = shx_65bit >> (shamt & 0x3F);\n+    uint64_t round_shx = (shx + 1) >> 1;\n+\n+    if (round_shx > 4294967295ULL) {\n+        env->vxsat = 1;\n+        return 0xFFFFFFFFU;\n+    } else {\n+        return (uint32_t)(round_shx & 0xFFFFFFFF);\n+    }\n+}\n+\n+/* Multiplication with Even-Odd Register Pairs as Destination (RV32 only) */\n+\n+/**\n+ * PMQWACC.H - Packed Q-format halfword to word multiply accumulate\n+ */\n+uint64_t HELPER(pmqwacc_h)(CPURISCVState *env, uint32_t rs1,\n+                           uint32_t rs2, uint64_t dest)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int16_t s1_h = (int16_t)EXTRACT16(rs1, i * 2);\n+        int16_t s2_h = (int16_t)EXTRACT16(rs2, i * 2);\n+        int32_t d_w = (int32_t)EXTRACT32(dest, i);\n+        int64_t prod = (int64_t)s1_h * (int64_t)s2_h;\n+        uint32_t res = (uint32_t)(d_w + (int32_t)(prod >> 15));\n+        rd = INSERT32(rd, res, i);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PMQRWACC.H - Packed Q-format halfword to word multiply\n+ * accumulate with rounding\n+ */\n+uint64_t HELPER(pmqrwacc_h)(CPURISCVState *env, uint32_t rs1,\n+                            uint32_t rs2, uint64_t dest)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int16_t s1_h = (int16_t)EXTRACT16(rs1, i * 2);\n+        int16_t s2_h = (int16_t)EXTRACT16(rs2, i * 2);\n+        int32_t d_w = (int32_t)EXTRACT32(dest, i);\n+        int64_t prod = (int64_t)s1_h * (int64_t)s2_h + (1LL << 14);\n+        uint32_t res = (uint32_t)(d_w + (int32_t)(prod >> 15));\n+        rd = INSERT32(rd, res, i);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMUL.B - Widening byte to halfword multiplication\n+ */\n+uint64_t HELPER(pwmul_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int8_t s1_b = (int8_t)EXTRACT8(rs1, i);\n+        int8_t s2_b = (int8_t)EXTRACT8(rs2, i);\n+        int16_t prod = (int16_t)s1_b * (int16_t)s2_b;\n+        rd |= ((uint64_t)(uint16_t)prod) << (i * 16);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMULSU.B - Widening signed x unsigned byte to halfword multiplication\n+ */\n+uint64_t HELPER(pwmulsu_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        int8_t s1_b = (int8_t)EXTRACT8(rs1, i);\n+        uint8_t s2_b = EXTRACT8(rs2, i);\n+        int16_t prod = (int16_t)s1_b * (uint16_t)s2_b;\n+        rd |= ((uint64_t)(uint16_t)prod) << (i * 16);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMULU.B - Widening unsigned byte to halfword multiplication\n+ */\n+uint64_t HELPER(pwmulu_b)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 4; i++) {\n+        uint8_t s1_b = EXTRACT8(rs1, i);\n+        uint8_t s2_b = EXTRACT8(rs2, i);\n+        uint16_t prod = (uint16_t)s1_b * (uint16_t)s2_b;\n+        rd |= ((uint64_t)prod) << (i * 16);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMUL.H - Widening halfword to word multiplication\n+ */\n+uint64_t HELPER(pwmul_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int16_t s1_h = (int16_t)EXTRACT16(rs1, i);\n+        int16_t s2_h = (int16_t)EXTRACT16(rs2, i);\n+        int32_t prod = (int32_t)s1_h * (int32_t)s2_h;\n+        rd |= ((uint64_t)(uint32_t)prod) << (i * 32);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMULSU.H - Widening signed x unsigned halfword to word multiplication\n+ */\n+uint64_t HELPER(pwmulsu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int16_t s1_h = (int16_t)EXTRACT16(rs1, i);\n+        uint16_t s2_h = EXTRACT16(rs2, i);\n+        int32_t prod = (int32_t)s1_h * (uint32_t)s2_h;\n+        rd |= ((uint64_t)(uint32_t)prod) << (i * 32);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMULU.H - Widening unsigned halfword to word multiplication\n+ */\n+uint64_t HELPER(pwmulu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint16_t s1_h = EXTRACT16(rs1, i);\n+        uint16_t s2_h = EXTRACT16(rs2, i);\n+        uint32_t prod = (uint32_t)s1_h * (uint32_t)s2_h;\n+        rd |= ((uint64_t)prod) << (i * 32);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMACC.H - Widening multiply accumulate (halfword to word)\n+ */\n+uint64_t HELPER(pwmacc_h)(CPURISCVState *env, uint32_t rs1,\n+                          uint32_t rs2, uint64_t dest)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int16_t s1_h = (int16_t)EXTRACT16(rs1, i);\n+        int16_t s2_h = (int16_t)EXTRACT16(rs2, i);\n+        int32_t d_w = (int32_t)EXTRACT32(dest, i);\n+        int32_t prod = (int32_t)s1_h * (int32_t)s2_h;\n+        uint32_t res = (uint32_t)(d_w + prod);\n+        rd |= ((uint64_t)res) << (i * 32);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMACCSU.H - Widening signed x unsigned multiply\n+ * accumulate (halfword to word)\n+ */\n+uint64_t HELPER(pwmaccsu_h)(CPURISCVState *env, uint32_t rs1,\n+                            uint32_t rs2, uint64_t dest)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        int16_t s1_h = (int16_t)EXTRACT16(rs1, i);\n+        uint16_t s2_h = EXTRACT16(rs2, i);\n+        int32_t d_w = (int32_t)EXTRACT32(dest, i);\n+        int32_t prod = (int32_t)s1_h * (uint32_t)s2_h;\n+        uint32_t res = (uint32_t)(d_w + prod);\n+        rd |= ((uint64_t)res) << (i * 32);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * PWMACCU.H - Widening unsigned multiply accumulate (halfword to word)\n+ */\n+uint64_t HELPER(pwmaccu_h)(CPURISCVState *env, uint32_t rs1,\n+                           uint32_t rs2, uint64_t dest)\n+{\n+    uint64_t rd = 0;\n+\n+    for (int i = 0; i < 2; i++) {\n+        uint16_t s1_h = EXTRACT16(rs1, i);\n+        uint16_t s2_h = EXTRACT16(rs2, i);\n+        uint32_t d_w = EXTRACT32(dest, i);\n+        uint32_t prod = (uint32_t)s1_h * (uint32_t)s2_h;\n+        uint32_t res = d_w + prod;\n+        rd |= ((uint64_t)res) << (i * 32);\n+    }\n+    return rd;\n+}\n+\n+/**\n+ * MQWACC - Q-format word multiply accumulate\n+ */\n+uint64_t HELPER(mqwacc)(CPURISCVState *env, uint32_t rs1,\n+                        uint32_t rs2, uint64_t dest)\n+{\n+    int64_t s1 = (int64_t)(int32_t)rs1;\n+    int64_t s2 = (int64_t)(int32_t)rs2;\n+    int64_t d = (int64_t)dest;\n+    __int128_t prod = (__int128_t)s1 * (__int128_t)s2;\n+    return (uint64_t)(d + (int64_t)(prod >> 31));\n+}\n+\n+/**\n+ * MQRWACC - Q-format word multiply accumulate with rounding\n+ */\n+uint64_t HELPER(mqrwacc)(CPURISCVState *env, uint32_t rs1,\n+                         uint32_t rs2, uint64_t dest)\n+{\n+    int64_t s1 = (int64_t)(int32_t)rs1;\n+    int64_t s2 = (int64_t)(int32_t)rs2;\n+    int64_t d = (int64_t)dest;\n+    __int128_t prod = (__int128_t)s1 * (__int128_t)s2 + (1LL << 30);\n+    return (uint64_t)(d + (int64_t)(prod >> 31));\n+}\n+\n+/**\n+ * WMUL - Widening signed multiplication (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmul)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    return (uint64_t)((int64_t)(int32_t)rs1 * (int64_t)(int32_t)rs2);\n+}\n+\n+/**\n+ * WMULSU - Widening signed x unsigned multiplication (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmulsu)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    return (uint64_t)((int64_t)(int32_t)rs1 * (uint64_t)rs2);\n+}\n+\n+/**\n+ * WMULU - Widening unsigned multiplication (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmulu)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    return (uint64_t)rs1 * (uint64_t)rs2;\n+}\n+\n+/**\n+ * WMACC - Widening multiply accumulate signed (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmacc)(CPURISCVState *env, uint32_t rs1,\n+                       uint32_t rs2, uint64_t dest)\n+{\n+    return (uint64_t)((int64_t)(int32_t)rs1 *\n+                      (int64_t)(int32_t)rs2 + (int64_t)dest);\n+}\n+\n+/**\n+ * WMACCSU - Widening multiply accumulate signed x unsigned\n+ * (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmaccsu)(CPURISCVState *env, uint32_t rs1,\n+                         uint32_t rs2, uint64_t dest)\n+{\n+    return (uint64_t)((int64_t)(int32_t)rs1 * (uint64_t)rs2 + (int64_t)dest);\n+}\n+\n+/**\n+ * WMACCU - Widening multiply accumulate unsigned (32-bit to 64-bit, RV32)\n+ */\n+uint64_t HELPER(wmaccu)(CPURISCVState *env, uint32_t rs1,\n+                        uint32_t rs2, uint64_t dest)\n+{\n+    return (uint64_t)rs1 * (uint64_t)rs2 + (uint64_t)dest;\n+}\n+\n+/**\n+ * PM2WADD.H - Add two widening products (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wadd_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+    int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+    int64_t prod0 = (int64_t)s1_h0 * (int64_t)s2_h0;\n+    int64_t prod1 = (int64_t)s1_h1 * (int64_t)s2_h1;\n+    return (uint64_t)(prod0 + prod1);\n+}\n+\n+/**\n+ * PM2WADDSU.H - Add two widening products\n+ * (signed x unsigned, halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2waddsu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    uint16_t s2_h0 = EXTRACT16(rs2, 0);\n+    uint16_t s2_h1 = EXTRACT16(rs2, 1);\n+    int64_t prod0 = (int64_t)s1_h0 * (uint64_t)s2_h0;\n+    int64_t prod1 = (int64_t)s1_h1 * (uint64_t)s2_h1;\n+    return (uint64_t)(prod0 + prod1);\n+}\n+\n+/**\n+ * PM2WADDU.H - Add two widening products (unsigned, halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2waddu_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    uint16_t s1_h0 = EXTRACT16(rs1, 0);\n+    uint16_t s1_h1 = EXTRACT16(rs1, 1);\n+    uint16_t s2_h0 = EXTRACT16(rs2, 0);\n+    uint16_t s2_h1 = EXTRACT16(rs2, 1);\n+    uint64_t prod0 = (uint64_t)s1_h0 * (uint64_t)s2_h0;\n+    uint64_t prod1 = (uint64_t)s1_h1 * (uint64_t)s2_h1;\n+    return prod0 + prod1;\n+}\n+\n+/**\n+ * PM2WADDA.H - Add two widening products with accumulate\n+ * (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wadda_h)(CPURISCVState *env, uint32_t rs1,\n+                            uint32_t rs2, uint64_t dest)\n+{\n+    int16_t s1_h0 = EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = EXTRACT16(rs1, 1);\n+    int16_t s2_h0 = EXTRACT16(rs2, 0);\n+    int16_t s2_h1 = EXTRACT16(rs2, 1);\n+    int64_t d_h = (int64_t)dest;\n+    int64_t mul_00 = (int64_t)s1_h0 * (int64_t)s2_h0;\n+    int64_t mul_11 = (int64_t)s1_h1 * (int64_t)s2_h1;\n+    return (uint64_t)(d_h + mul_00 + mul_11);\n+}\n+\n+/**\n+ * PM2WADDASU.H - Add two widening products with accumulate\n+ * (signed x unsigned, halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2waddasu_h)(CPURISCVState *env, uint32_t rs1,\n+                              uint32_t rs2, uint64_t dest)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    uint16_t s2_h0 = (uint16_t)EXTRACT16(rs2, 0);\n+    uint16_t s2_h1 = (uint16_t)EXTRACT16(rs2, 1);\n+    int64_t d_h = (int64_t)dest;\n+    int64_t mul_00 = (int64_t)s1_h0 * (uint64_t)s2_h0;\n+    int64_t mul_11 = (int64_t)s1_h1 * (uint64_t)s2_h1;\n+    return (uint64_t)(d_h + mul_00 + mul_11);\n+}\n+\n+/**\n+ * PM2WADDAU.H - Add two widening products with accumulate\n+ * (unsigned, halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2waddau_h)(CPURISCVState *env, uint32_t rs1,\n+                             uint32_t rs2, uint64_t dest)\n+{\n+    uint16_t s1_h0 = (uint16_t)EXTRACT16(rs1, 0);\n+    uint16_t s1_h1 = (uint16_t)EXTRACT16(rs1, 1);\n+    uint16_t s2_h0 = (uint16_t)EXTRACT16(rs2, 0);\n+    uint16_t s2_h1 = (uint16_t)EXTRACT16(rs2, 1);\n+    uint64_t d_h = (uint64_t)dest;\n+    uint64_t mul_00 = (uint64_t)s1_h0 * (uint64_t)s2_h0;\n+    uint64_t mul_11 = (uint64_t)s1_h1 * (uint64_t)s2_h1;\n+    return (uint64_t)(d_h + mul_00 + mul_11);\n+}\n+\n+/**\n+ * PM2WADD.HX - Add two widening cross products (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wadd_hx)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+    int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+    int64_t prod01 = (int64_t)s1_h0 * (int64_t)s2_h1;\n+    int64_t prod10 = (int64_t)s1_h1 * (int64_t)s2_h0;\n+    return (uint64_t)(prod01 + prod10);\n+}\n+\n+/**\n+ * PM2WADDA.HX - Add two widening cross products with accumulate\n+ * (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wadda_hx)(CPURISCVState *env, uint32_t rs1,\n+                             uint32_t rs2, uint64_t dest)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+    int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+    int64_t d = (int64_t)dest;\n+    int64_t prod01 = (int64_t)s1_h0 * (int64_t)s2_h1;\n+    int64_t prod10 = (int64_t)s1_h1 * (int64_t)s2_h0;\n+    return (uint64_t)(d + prod01 + prod10);\n+}\n+\n+/**\n+ * PM2WSUB.H - Subtract two widening products (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wsub_h)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+    int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+    int64_t prod0 = (int64_t)s1_h0 * (int64_t)s2_h0;\n+    int64_t prod1 = (int64_t)s1_h1 * (int64_t)s2_h1;\n+    return (uint64_t)(prod0 - prod1);\n+}\n+\n+/**\n+ * PM2WSUB.HX - Subtract two widening cross products (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wsub_hx)(CPURISCVState *env, uint32_t rs1, uint32_t rs2)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+    int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+    int64_t prod01 = (int64_t)s1_h0 * (int64_t)s2_h1;\n+    int64_t prod10 = (int64_t)s1_h1 * (int64_t)s2_h0;\n+    return (uint64_t)(prod01 - prod10);\n+}\n+\n+/**\n+ * PM2WSUBA.H - Subtract two widening products with accumulate\n+ * (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wsuba_h)(CPURISCVState *env, uint32_t rs1,\n+                            uint32_t rs2, uint64_t dest)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+    int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+    int64_t d = (int64_t)dest;\n+    int64_t prod0 = (int64_t)s1_h0 * (int64_t)s2_h0;\n+    int64_t prod1 = (int64_t)s1_h1 * (int64_t)s2_h1;\n+    return (uint64_t)(d + prod0 - prod1);\n+}\n+\n+/**\n+ * PM2WSUBA.HX - Subtract two widening cross products with accumulate\n+ * (halfword to doubleword)\n+ */\n+uint64_t HELPER(pm2wsuba_hx)(CPURISCVState *env, uint32_t rs1,\n+                             uint32_t rs2, uint64_t dest)\n+{\n+    int16_t s1_h0 = (int16_t)EXTRACT16(rs1, 0);\n+    int16_t s1_h1 = (int16_t)EXTRACT16(rs1, 1);\n+    int16_t s2_h0 = (int16_t)EXTRACT16(rs2, 0);\n+    int16_t s2_h1 = (int16_t)EXTRACT16(rs2, 1);\n+    int64_t d = (int64_t)dest;\n+    int64_t prod01 = (int64_t)s1_h0 * (int64_t)s2_h1;\n+    int64_t prod10 = (int64_t)s1_h1 * (int64_t)s2_h0;\n+    return (uint64_t)(d + prod01 - prod10);\n+}\n",
    "prefixes": [
        "13/14"
    ]
}