From patchwork Mon Mar 14 07:38:00 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611085 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT3Cg3wgxz9sFk for ; Wed, 30 Mar 2022 21:51:58 +1100 (AEDT) Received: from localhost ([::1]:56138 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVvf-0003qr-3F for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:51:55 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44396) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVM-0004y3-0V; Wed, 30 Mar 2022 06:24:44 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37014) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVF-00074d-Hr; Wed, 30 Mar 2022 06:24:43 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id D6CFD11EF61; Wed, 30 Mar 2022 10:24:34 +0000 (UTC) From: ~eopxd Date: Mon, 14 Mar 2022 00:38:00 -0700 Subject: [PATCH qemu v7 01/14] target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed Message-ID: <164863587444.17401.9965527486691250478-1@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD No functional change intended in this commit. Signed-off-by: eop Chen Reviewed-by: Frank Chang Reviewed-by: Alistair Francis --- target/riscv/vector_helper.c | 1132 +++++++++++++++++----------------- 1 file changed, 565 insertions(+), 567 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 3bd4aac9c9..e94caf1a3c 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -710,7 +710,6 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - uint32_t esz, uint32_t dsz, opivv2_fn *fn) { uint32_t vm = vext_vm(desc); @@ -727,23 +726,23 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, } /* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ - do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ do_##NAME); \ } -GEN_VEXT_VV(vadd_vv_b, 1, 1) -GEN_VEXT_VV(vadd_vv_h, 2, 2) -GEN_VEXT_VV(vadd_vv_w, 4, 4) -GEN_VEXT_VV(vadd_vv_d, 8, 8) -GEN_VEXT_VV(vsub_vv_b, 1, 1) -GEN_VEXT_VV(vsub_vv_h, 2, 2) -GEN_VEXT_VV(vsub_vv_w, 4, 4) -GEN_VEXT_VV(vsub_vv_d, 8, 8) +GEN_VEXT_VV(vadd_vv_b) +GEN_VEXT_VV(vadd_vv_h) +GEN_VEXT_VV(vadd_vv_w) +GEN_VEXT_VV(vadd_vv_d) +GEN_VEXT_VV(vsub_vv_b) +GEN_VEXT_VV(vsub_vv_h) +GEN_VEXT_VV(vsub_vv_w) +GEN_VEXT_VV(vsub_vv_d) typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); @@ -773,7 +772,6 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - uint32_t esz, uint32_t dsz, opivx2_fn fn) { uint32_t vm = vext_vm(desc); @@ -790,27 +788,27 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, } /* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VX(NAME) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ - do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_vext_vx(vd, v0, s1, vs2, env, desc, \ do_##NAME); \ } -GEN_VEXT_VX(vadd_vx_b, 1, 1) -GEN_VEXT_VX(vadd_vx_h, 2, 2) -GEN_VEXT_VX(vadd_vx_w, 4, 4) -GEN_VEXT_VX(vadd_vx_d, 8, 8) -GEN_VEXT_VX(vsub_vx_b, 1, 1) -GEN_VEXT_VX(vsub_vx_h, 2, 2) -GEN_VEXT_VX(vsub_vx_w, 4, 4) -GEN_VEXT_VX(vsub_vx_d, 8, 8) -GEN_VEXT_VX(vrsub_vx_b, 1, 1) -GEN_VEXT_VX(vrsub_vx_h, 2, 2) -GEN_VEXT_VX(vrsub_vx_w, 4, 4) -GEN_VEXT_VX(vrsub_vx_d, 8, 8) +GEN_VEXT_VX(vadd_vx_b) +GEN_VEXT_VX(vadd_vx_h) +GEN_VEXT_VX(vadd_vx_w) +GEN_VEXT_VX(vadd_vx_d) +GEN_VEXT_VX(vsub_vx_b) +GEN_VEXT_VX(vsub_vx_h) +GEN_VEXT_VX(vsub_vx_w) +GEN_VEXT_VX(vsub_vx_d) +GEN_VEXT_VX(vrsub_vx_b) +GEN_VEXT_VX(vrsub_vx_h) +GEN_VEXT_VX(vrsub_vx_w) +GEN_VEXT_VX(vrsub_vx_d) void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) { @@ -889,30 +887,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) -GEN_VEXT_VV(vwaddu_vv_b, 1, 2) -GEN_VEXT_VV(vwaddu_vv_h, 2, 4) -GEN_VEXT_VV(vwaddu_vv_w, 4, 8) -GEN_VEXT_VV(vwsubu_vv_b, 1, 2) -GEN_VEXT_VV(vwsubu_vv_h, 2, 4) -GEN_VEXT_VV(vwsubu_vv_w, 4, 8) -GEN_VEXT_VV(vwadd_vv_b, 1, 2) -GEN_VEXT_VV(vwadd_vv_h, 2, 4) -GEN_VEXT_VV(vwadd_vv_w, 4, 8) -GEN_VEXT_VV(vwsub_vv_b, 1, 2) -GEN_VEXT_VV(vwsub_vv_h, 2, 4) -GEN_VEXT_VV(vwsub_vv_w, 4, 8) -GEN_VEXT_VV(vwaddu_wv_b, 1, 2) -GEN_VEXT_VV(vwaddu_wv_h, 2, 4) -GEN_VEXT_VV(vwaddu_wv_w, 4, 8) -GEN_VEXT_VV(vwsubu_wv_b, 1, 2) -GEN_VEXT_VV(vwsubu_wv_h, 2, 4) -GEN_VEXT_VV(vwsubu_wv_w, 4, 8) -GEN_VEXT_VV(vwadd_wv_b, 1, 2) -GEN_VEXT_VV(vwadd_wv_h, 2, 4) -GEN_VEXT_VV(vwadd_wv_w, 4, 8) -GEN_VEXT_VV(vwsub_wv_b, 1, 2) -GEN_VEXT_VV(vwsub_wv_h, 2, 4) -GEN_VEXT_VV(vwsub_wv_w, 4, 8) +GEN_VEXT_VV(vwaddu_vv_b) +GEN_VEXT_VV(vwaddu_vv_h) +GEN_VEXT_VV(vwaddu_vv_w) +GEN_VEXT_VV(vwsubu_vv_b) +GEN_VEXT_VV(vwsubu_vv_h) +GEN_VEXT_VV(vwsubu_vv_w) +GEN_VEXT_VV(vwadd_vv_b) +GEN_VEXT_VV(vwadd_vv_h) +GEN_VEXT_VV(vwadd_vv_w) +GEN_VEXT_VV(vwsub_vv_b) +GEN_VEXT_VV(vwsub_vv_h) +GEN_VEXT_VV(vwsub_vv_w) +GEN_VEXT_VV(vwaddu_wv_b) +GEN_VEXT_VV(vwaddu_wv_h) +GEN_VEXT_VV(vwaddu_wv_w) +GEN_VEXT_VV(vwsubu_wv_b) +GEN_VEXT_VV(vwsubu_wv_h) +GEN_VEXT_VV(vwsubu_wv_w) +GEN_VEXT_VV(vwadd_wv_b) +GEN_VEXT_VV(vwadd_wv_h) +GEN_VEXT_VV(vwadd_wv_w) +GEN_VEXT_VV(vwsub_wv_b) +GEN_VEXT_VV(vwsub_wv_h) +GEN_VEXT_VV(vwsub_wv_w) RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) @@ -938,30 +936,30 @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) -GEN_VEXT_VX(vwaddu_vx_b, 1, 2) -GEN_VEXT_VX(vwaddu_vx_h, 2, 4) -GEN_VEXT_VX(vwaddu_vx_w, 4, 8) -GEN_VEXT_VX(vwsubu_vx_b, 1, 2) -GEN_VEXT_VX(vwsubu_vx_h, 2, 4) -GEN_VEXT_VX(vwsubu_vx_w, 4, 8) -GEN_VEXT_VX(vwadd_vx_b, 1, 2) -GEN_VEXT_VX(vwadd_vx_h, 2, 4) -GEN_VEXT_VX(vwadd_vx_w, 4, 8) -GEN_VEXT_VX(vwsub_vx_b, 1, 2) -GEN_VEXT_VX(vwsub_vx_h, 2, 4) -GEN_VEXT_VX(vwsub_vx_w, 4, 8) -GEN_VEXT_VX(vwaddu_wx_b, 1, 2) -GEN_VEXT_VX(vwaddu_wx_h, 2, 4) -GEN_VEXT_VX(vwaddu_wx_w, 4, 8) -GEN_VEXT_VX(vwsubu_wx_b, 1, 2) -GEN_VEXT_VX(vwsubu_wx_h, 2, 4) -GEN_VEXT_VX(vwsubu_wx_w, 4, 8) -GEN_VEXT_VX(vwadd_wx_b, 1, 2) -GEN_VEXT_VX(vwadd_wx_h, 2, 4) -GEN_VEXT_VX(vwadd_wx_w, 4, 8) -GEN_VEXT_VX(vwsub_wx_b, 1, 2) -GEN_VEXT_VX(vwsub_wx_h, 2, 4) -GEN_VEXT_VX(vwsub_wx_w, 4, 8) +GEN_VEXT_VX(vwaddu_vx_b) +GEN_VEXT_VX(vwaddu_vx_h) +GEN_VEXT_VX(vwaddu_vx_w) +GEN_VEXT_VX(vwsubu_vx_b) +GEN_VEXT_VX(vwsubu_vx_h) +GEN_VEXT_VX(vwsubu_vx_w) +GEN_VEXT_VX(vwadd_vx_b) +GEN_VEXT_VX(vwadd_vx_h) +GEN_VEXT_VX(vwadd_vx_w) +GEN_VEXT_VX(vwsub_vx_b) +GEN_VEXT_VX(vwsub_vx_h) +GEN_VEXT_VX(vwsub_vx_w) +GEN_VEXT_VX(vwaddu_wx_b) +GEN_VEXT_VX(vwaddu_wx_h) +GEN_VEXT_VX(vwaddu_wx_w) +GEN_VEXT_VX(vwsubu_wx_b) +GEN_VEXT_VX(vwsubu_wx_h) +GEN_VEXT_VX(vwsubu_wx_w) +GEN_VEXT_VX(vwadd_wx_b) +GEN_VEXT_VX(vwadd_wx_h) +GEN_VEXT_VX(vwadd_wx_w) +GEN_VEXT_VX(vwsub_wx_b) +GEN_VEXT_VX(vwsub_wx_h) +GEN_VEXT_VX(vwsub_wx_w) /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ #define DO_VADC(N, M, C) (N + M + C) @@ -1091,18 +1089,18 @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) -GEN_VEXT_VV(vand_vv_b, 1, 1) -GEN_VEXT_VV(vand_vv_h, 2, 2) -GEN_VEXT_VV(vand_vv_w, 4, 4) -GEN_VEXT_VV(vand_vv_d, 8, 8) -GEN_VEXT_VV(vor_vv_b, 1, 1) -GEN_VEXT_VV(vor_vv_h, 2, 2) -GEN_VEXT_VV(vor_vv_w, 4, 4) -GEN_VEXT_VV(vor_vv_d, 8, 8) -GEN_VEXT_VV(vxor_vv_b, 1, 1) -GEN_VEXT_VV(vxor_vv_h, 2, 2) -GEN_VEXT_VV(vxor_vv_w, 4, 4) -GEN_VEXT_VV(vxor_vv_d, 8, 8) +GEN_VEXT_VV(vand_vv_b) +GEN_VEXT_VV(vand_vv_h) +GEN_VEXT_VV(vand_vv_w) +GEN_VEXT_VV(vand_vv_d) +GEN_VEXT_VV(vor_vv_b) +GEN_VEXT_VV(vor_vv_h) +GEN_VEXT_VV(vor_vv_w) +GEN_VEXT_VV(vor_vv_d) +GEN_VEXT_VV(vxor_vv_b) +GEN_VEXT_VV(vxor_vv_h) +GEN_VEXT_VV(vxor_vv_w) +GEN_VEXT_VV(vxor_vv_d) RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) @@ -1116,18 +1114,18 @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) -GEN_VEXT_VX(vand_vx_b, 1, 1) -GEN_VEXT_VX(vand_vx_h, 2, 2) -GEN_VEXT_VX(vand_vx_w, 4, 4) -GEN_VEXT_VX(vand_vx_d, 8, 8) -GEN_VEXT_VX(vor_vx_b, 1, 1) -GEN_VEXT_VX(vor_vx_h, 2, 2) -GEN_VEXT_VX(vor_vx_w, 4, 4) -GEN_VEXT_VX(vor_vx_d, 8, 8) -GEN_VEXT_VX(vxor_vx_b, 1, 1) -GEN_VEXT_VX(vxor_vx_h, 2, 2) -GEN_VEXT_VX(vxor_vx_w, 4, 4) -GEN_VEXT_VX(vxor_vx_d, 8, 8) +GEN_VEXT_VX(vand_vx_b) +GEN_VEXT_VX(vand_vx_h) +GEN_VEXT_VX(vand_vx_w) +GEN_VEXT_VX(vand_vx_d) +GEN_VEXT_VX(vor_vx_b) +GEN_VEXT_VX(vor_vx_h) +GEN_VEXT_VX(vor_vx_w) +GEN_VEXT_VX(vor_vx_d) +GEN_VEXT_VX(vxor_vx_b) +GEN_VEXT_VX(vxor_vx_h) +GEN_VEXT_VX(vxor_vx_w) +GEN_VEXT_VX(vxor_vx_d) /* Vector Single-Width Bit Shift Instructions */ #define DO_SLL(N, M) (N << (M)) @@ -1348,22 +1346,22 @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) -GEN_VEXT_VV(vminu_vv_b, 1, 1) -GEN_VEXT_VV(vminu_vv_h, 2, 2) -GEN_VEXT_VV(vminu_vv_w, 4, 4) -GEN_VEXT_VV(vminu_vv_d, 8, 8) -GEN_VEXT_VV(vmin_vv_b, 1, 1) -GEN_VEXT_VV(vmin_vv_h, 2, 2) -GEN_VEXT_VV(vmin_vv_w, 4, 4) -GEN_VEXT_VV(vmin_vv_d, 8, 8) -GEN_VEXT_VV(vmaxu_vv_b, 1, 1) -GEN_VEXT_VV(vmaxu_vv_h, 2, 2) -GEN_VEXT_VV(vmaxu_vv_w, 4, 4) -GEN_VEXT_VV(vmaxu_vv_d, 8, 8) -GEN_VEXT_VV(vmax_vv_b, 1, 1) -GEN_VEXT_VV(vmax_vv_h, 2, 2) -GEN_VEXT_VV(vmax_vv_w, 4, 4) -GEN_VEXT_VV(vmax_vv_d, 8, 8) +GEN_VEXT_VV(vminu_vv_b) +GEN_VEXT_VV(vminu_vv_h) +GEN_VEXT_VV(vminu_vv_w) +GEN_VEXT_VV(vminu_vv_d) +GEN_VEXT_VV(vmin_vv_b) +GEN_VEXT_VV(vmin_vv_h) +GEN_VEXT_VV(vmin_vv_w) +GEN_VEXT_VV(vmin_vv_d) +GEN_VEXT_VV(vmaxu_vv_b) +GEN_VEXT_VV(vmaxu_vv_h) +GEN_VEXT_VV(vmaxu_vv_w) +GEN_VEXT_VV(vmaxu_vv_d) +GEN_VEXT_VV(vmax_vv_b) +GEN_VEXT_VV(vmax_vv_h) +GEN_VEXT_VV(vmax_vv_w) +GEN_VEXT_VV(vmax_vv_d) RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) @@ -1381,22 +1379,22 @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) -GEN_VEXT_VX(vminu_vx_b, 1, 1) -GEN_VEXT_VX(vminu_vx_h, 2, 2) -GEN_VEXT_VX(vminu_vx_w, 4, 4) -GEN_VEXT_VX(vminu_vx_d, 8, 8) -GEN_VEXT_VX(vmin_vx_b, 1, 1) -GEN_VEXT_VX(vmin_vx_h, 2, 2) -GEN_VEXT_VX(vmin_vx_w, 4, 4) -GEN_VEXT_VX(vmin_vx_d, 8, 8) -GEN_VEXT_VX(vmaxu_vx_b, 1, 1) -GEN_VEXT_VX(vmaxu_vx_h, 2, 2) -GEN_VEXT_VX(vmaxu_vx_w, 4, 4) -GEN_VEXT_VX(vmaxu_vx_d, 8, 8) -GEN_VEXT_VX(vmax_vx_b, 1, 1) -GEN_VEXT_VX(vmax_vx_h, 2, 2) -GEN_VEXT_VX(vmax_vx_w, 4, 4) -GEN_VEXT_VX(vmax_vx_d, 8, 8) +GEN_VEXT_VX(vminu_vx_b) +GEN_VEXT_VX(vminu_vx_h) +GEN_VEXT_VX(vminu_vx_w) +GEN_VEXT_VX(vminu_vx_d) +GEN_VEXT_VX(vmin_vx_b) +GEN_VEXT_VX(vmin_vx_h) +GEN_VEXT_VX(vmin_vx_w) +GEN_VEXT_VX(vmin_vx_d) +GEN_VEXT_VX(vmaxu_vx_b) +GEN_VEXT_VX(vmaxu_vx_h) +GEN_VEXT_VX(vmaxu_vx_w) +GEN_VEXT_VX(vmaxu_vx_d) +GEN_VEXT_VX(vmax_vx_b) +GEN_VEXT_VX(vmax_vx_h) +GEN_VEXT_VX(vmax_vx_w) +GEN_VEXT_VX(vmax_vx_d) /* Vector Single-Width Integer Multiply Instructions */ #define DO_MUL(N, M) (N * M) @@ -1404,10 +1402,10 @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) -GEN_VEXT_VV(vmul_vv_b, 1, 1) -GEN_VEXT_VV(vmul_vv_h, 2, 2) -GEN_VEXT_VV(vmul_vv_w, 4, 4) -GEN_VEXT_VV(vmul_vv_d, 8, 8) +GEN_VEXT_VV(vmul_vv_b) +GEN_VEXT_VV(vmul_vv_h) +GEN_VEXT_VV(vmul_vv_w) +GEN_VEXT_VV(vmul_vv_d) static int8_t do_mulh_b(int8_t s2, int8_t s1) { @@ -1511,18 +1509,18 @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) -GEN_VEXT_VV(vmulh_vv_b, 1, 1) -GEN_VEXT_VV(vmulh_vv_h, 2, 2) -GEN_VEXT_VV(vmulh_vv_w, 4, 4) -GEN_VEXT_VV(vmulh_vv_d, 8, 8) -GEN_VEXT_VV(vmulhu_vv_b, 1, 1) -GEN_VEXT_VV(vmulhu_vv_h, 2, 2) -GEN_VEXT_VV(vmulhu_vv_w, 4, 4) -GEN_VEXT_VV(vmulhu_vv_d, 8, 8) -GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) -GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) -GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) -GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) +GEN_VEXT_VV(vmulh_vv_b) +GEN_VEXT_VV(vmulh_vv_h) +GEN_VEXT_VV(vmulh_vv_w) +GEN_VEXT_VV(vmulh_vv_d) +GEN_VEXT_VV(vmulhu_vv_b) +GEN_VEXT_VV(vmulhu_vv_h) +GEN_VEXT_VV(vmulhu_vv_w) +GEN_VEXT_VV(vmulhu_vv_d) +GEN_VEXT_VV(vmulhsu_vv_b) +GEN_VEXT_VV(vmulhsu_vv_h) +GEN_VEXT_VV(vmulhsu_vv_w) +GEN_VEXT_VV(vmulhsu_vv_d) RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) @@ -1540,22 +1538,22 @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) -GEN_VEXT_VX(vmul_vx_b, 1, 1) -GEN_VEXT_VX(vmul_vx_h, 2, 2) -GEN_VEXT_VX(vmul_vx_w, 4, 4) -GEN_VEXT_VX(vmul_vx_d, 8, 8) -GEN_VEXT_VX(vmulh_vx_b, 1, 1) -GEN_VEXT_VX(vmulh_vx_h, 2, 2) -GEN_VEXT_VX(vmulh_vx_w, 4, 4) -GEN_VEXT_VX(vmulh_vx_d, 8, 8) -GEN_VEXT_VX(vmulhu_vx_b, 1, 1) -GEN_VEXT_VX(vmulhu_vx_h, 2, 2) -GEN_VEXT_VX(vmulhu_vx_w, 4, 4) -GEN_VEXT_VX(vmulhu_vx_d, 8, 8) -GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) -GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) -GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) -GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) +GEN_VEXT_VX(vmul_vx_b) +GEN_VEXT_VX(vmul_vx_h) +GEN_VEXT_VX(vmul_vx_w) +GEN_VEXT_VX(vmul_vx_d) +GEN_VEXT_VX(vmulh_vx_b) +GEN_VEXT_VX(vmulh_vx_h) +GEN_VEXT_VX(vmulh_vx_w) +GEN_VEXT_VX(vmulh_vx_d) +GEN_VEXT_VX(vmulhu_vx_b) +GEN_VEXT_VX(vmulhu_vx_h) +GEN_VEXT_VX(vmulhu_vx_w) +GEN_VEXT_VX(vmulhu_vx_d) +GEN_VEXT_VX(vmulhsu_vx_b) +GEN_VEXT_VX(vmulhsu_vx_h) +GEN_VEXT_VX(vmulhsu_vx_w) +GEN_VEXT_VX(vmulhsu_vx_d) /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) @@ -1581,22 +1579,22 @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) -GEN_VEXT_VV(vdivu_vv_b, 1, 1) -GEN_VEXT_VV(vdivu_vv_h, 2, 2) -GEN_VEXT_VV(vdivu_vv_w, 4, 4) -GEN_VEXT_VV(vdivu_vv_d, 8, 8) -GEN_VEXT_VV(vdiv_vv_b, 1, 1) -GEN_VEXT_VV(vdiv_vv_h, 2, 2) -GEN_VEXT_VV(vdiv_vv_w, 4, 4) -GEN_VEXT_VV(vdiv_vv_d, 8, 8) -GEN_VEXT_VV(vremu_vv_b, 1, 1) -GEN_VEXT_VV(vremu_vv_h, 2, 2) -GEN_VEXT_VV(vremu_vv_w, 4, 4) -GEN_VEXT_VV(vremu_vv_d, 8, 8) -GEN_VEXT_VV(vrem_vv_b, 1, 1) -GEN_VEXT_VV(vrem_vv_h, 2, 2) -GEN_VEXT_VV(vrem_vv_w, 4, 4) -GEN_VEXT_VV(vrem_vv_d, 8, 8) +GEN_VEXT_VV(vdivu_vv_b) +GEN_VEXT_VV(vdivu_vv_h) +GEN_VEXT_VV(vdivu_vv_w) +GEN_VEXT_VV(vdivu_vv_d) +GEN_VEXT_VV(vdiv_vv_b) +GEN_VEXT_VV(vdiv_vv_h) +GEN_VEXT_VV(vdiv_vv_w) +GEN_VEXT_VV(vdiv_vv_d) +GEN_VEXT_VV(vremu_vv_b) +GEN_VEXT_VV(vremu_vv_h) +GEN_VEXT_VV(vremu_vv_w) +GEN_VEXT_VV(vremu_vv_d) +GEN_VEXT_VV(vrem_vv_b) +GEN_VEXT_VV(vrem_vv_h) +GEN_VEXT_VV(vrem_vv_w) +GEN_VEXT_VV(vrem_vv_d) RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) @@ -1614,22 +1612,22 @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) -GEN_VEXT_VX(vdivu_vx_b, 1, 1) -GEN_VEXT_VX(vdivu_vx_h, 2, 2) -GEN_VEXT_VX(vdivu_vx_w, 4, 4) -GEN_VEXT_VX(vdivu_vx_d, 8, 8) -GEN_VEXT_VX(vdiv_vx_b, 1, 1) -GEN_VEXT_VX(vdiv_vx_h, 2, 2) -GEN_VEXT_VX(vdiv_vx_w, 4, 4) -GEN_VEXT_VX(vdiv_vx_d, 8, 8) -GEN_VEXT_VX(vremu_vx_b, 1, 1) -GEN_VEXT_VX(vremu_vx_h, 2, 2) -GEN_VEXT_VX(vremu_vx_w, 4, 4) -GEN_VEXT_VX(vremu_vx_d, 8, 8) -GEN_VEXT_VX(vrem_vx_b, 1, 1) -GEN_VEXT_VX(vrem_vx_h, 2, 2) -GEN_VEXT_VX(vrem_vx_w, 4, 4) -GEN_VEXT_VX(vrem_vx_d, 8, 8) +GEN_VEXT_VX(vdivu_vx_b) +GEN_VEXT_VX(vdivu_vx_h) +GEN_VEXT_VX(vdivu_vx_w) +GEN_VEXT_VX(vdivu_vx_d) +GEN_VEXT_VX(vdiv_vx_b) +GEN_VEXT_VX(vdiv_vx_h) +GEN_VEXT_VX(vdiv_vx_w) +GEN_VEXT_VX(vdiv_vx_d) +GEN_VEXT_VX(vremu_vx_b) +GEN_VEXT_VX(vremu_vx_h) +GEN_VEXT_VX(vremu_vx_w) +GEN_VEXT_VX(vremu_vx_d) +GEN_VEXT_VX(vrem_vx_b) +GEN_VEXT_VX(vrem_vx_h) +GEN_VEXT_VX(vrem_vx_w) +GEN_VEXT_VX(vrem_vx_d) /* Vector Widening Integer Multiply Instructions */ RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) @@ -1641,15 +1639,15 @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) -GEN_VEXT_VV(vwmul_vv_b, 1, 2) -GEN_VEXT_VV(vwmul_vv_h, 2, 4) -GEN_VEXT_VV(vwmul_vv_w, 4, 8) -GEN_VEXT_VV(vwmulu_vv_b, 1, 2) -GEN_VEXT_VV(vwmulu_vv_h, 2, 4) -GEN_VEXT_VV(vwmulu_vv_w, 4, 8) -GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) -GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) -GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) +GEN_VEXT_VV(vwmul_vv_b) +GEN_VEXT_VV(vwmul_vv_h) +GEN_VEXT_VV(vwmul_vv_w) +GEN_VEXT_VV(vwmulu_vv_b) +GEN_VEXT_VV(vwmulu_vv_h) +GEN_VEXT_VV(vwmulu_vv_w) +GEN_VEXT_VV(vwmulsu_vv_b) +GEN_VEXT_VV(vwmulsu_vv_h) +GEN_VEXT_VV(vwmulsu_vv_w) RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) @@ -1660,15 +1658,15 @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) -GEN_VEXT_VX(vwmul_vx_b, 1, 2) -GEN_VEXT_VX(vwmul_vx_h, 2, 4) -GEN_VEXT_VX(vwmul_vx_w, 4, 8) -GEN_VEXT_VX(vwmulu_vx_b, 1, 2) -GEN_VEXT_VX(vwmulu_vx_h, 2, 4) -GEN_VEXT_VX(vwmulu_vx_w, 4, 8) -GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) -GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) -GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) +GEN_VEXT_VX(vwmul_vx_b) +GEN_VEXT_VX(vwmul_vx_h) +GEN_VEXT_VX(vwmul_vx_w) +GEN_VEXT_VX(vwmulu_vx_b) +GEN_VEXT_VX(vwmulu_vx_h) +GEN_VEXT_VX(vwmulu_vx_w) +GEN_VEXT_VX(vwmulsu_vx_b) +GEN_VEXT_VX(vwmulsu_vx_h) +GEN_VEXT_VX(vwmulsu_vx_w) /* Vector Single-Width Integer Multiply-Add Instructions */ #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -1700,22 +1698,22 @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) -GEN_VEXT_VV(vmacc_vv_b, 1, 1) -GEN_VEXT_VV(vmacc_vv_h, 2, 2) -GEN_VEXT_VV(vmacc_vv_w, 4, 4) -GEN_VEXT_VV(vmacc_vv_d, 8, 8) -GEN_VEXT_VV(vnmsac_vv_b, 1, 1) -GEN_VEXT_VV(vnmsac_vv_h, 2, 2) -GEN_VEXT_VV(vnmsac_vv_w, 4, 4) -GEN_VEXT_VV(vnmsac_vv_d, 8, 8) -GEN_VEXT_VV(vmadd_vv_b, 1, 1) -GEN_VEXT_VV(vmadd_vv_h, 2, 2) -GEN_VEXT_VV(vmadd_vv_w, 4, 4) -GEN_VEXT_VV(vmadd_vv_d, 8, 8) -GEN_VEXT_VV(vnmsub_vv_b, 1, 1) -GEN_VEXT_VV(vnmsub_vv_h, 2, 2) -GEN_VEXT_VV(vnmsub_vv_w, 4, 4) -GEN_VEXT_VV(vnmsub_vv_d, 8, 8) +GEN_VEXT_VV(vmacc_vv_b) +GEN_VEXT_VV(vmacc_vv_h) +GEN_VEXT_VV(vmacc_vv_w) +GEN_VEXT_VV(vmacc_vv_d) +GEN_VEXT_VV(vnmsac_vv_b) +GEN_VEXT_VV(vnmsac_vv_h) +GEN_VEXT_VV(vnmsac_vv_w) +GEN_VEXT_VV(vnmsac_vv_d) +GEN_VEXT_VV(vmadd_vv_b) +GEN_VEXT_VV(vmadd_vv_h) +GEN_VEXT_VV(vmadd_vv_w) +GEN_VEXT_VV(vmadd_vv_d) +GEN_VEXT_VV(vnmsub_vv_b) +GEN_VEXT_VV(vnmsub_vv_h) +GEN_VEXT_VV(vnmsub_vv_w) +GEN_VEXT_VV(vnmsub_vv_d) #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ @@ -1741,22 +1739,22 @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) -GEN_VEXT_VX(vmacc_vx_b, 1, 1) -GEN_VEXT_VX(vmacc_vx_h, 2, 2) -GEN_VEXT_VX(vmacc_vx_w, 4, 4) -GEN_VEXT_VX(vmacc_vx_d, 8, 8) -GEN_VEXT_VX(vnmsac_vx_b, 1, 1) -GEN_VEXT_VX(vnmsac_vx_h, 2, 2) -GEN_VEXT_VX(vnmsac_vx_w, 4, 4) -GEN_VEXT_VX(vnmsac_vx_d, 8, 8) -GEN_VEXT_VX(vmadd_vx_b, 1, 1) -GEN_VEXT_VX(vmadd_vx_h, 2, 2) -GEN_VEXT_VX(vmadd_vx_w, 4, 4) -GEN_VEXT_VX(vmadd_vx_d, 8, 8) -GEN_VEXT_VX(vnmsub_vx_b, 1, 1) -GEN_VEXT_VX(vnmsub_vx_h, 2, 2) -GEN_VEXT_VX(vnmsub_vx_w, 4, 4) -GEN_VEXT_VX(vnmsub_vx_d, 8, 8) +GEN_VEXT_VX(vmacc_vx_b) +GEN_VEXT_VX(vmacc_vx_h) +GEN_VEXT_VX(vmacc_vx_w) +GEN_VEXT_VX(vmacc_vx_d) +GEN_VEXT_VX(vnmsac_vx_b) +GEN_VEXT_VX(vnmsac_vx_h) +GEN_VEXT_VX(vnmsac_vx_w) +GEN_VEXT_VX(vnmsac_vx_d) +GEN_VEXT_VX(vmadd_vx_b) +GEN_VEXT_VX(vmadd_vx_h) +GEN_VEXT_VX(vmadd_vx_w) +GEN_VEXT_VX(vmadd_vx_d) +GEN_VEXT_VX(vnmsub_vx_b) +GEN_VEXT_VX(vnmsub_vx_h) +GEN_VEXT_VX(vnmsub_vx_w) +GEN_VEXT_VX(vnmsub_vx_d) /* Vector Widening Integer Multiply-Add Instructions */ RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) @@ -1768,15 +1766,15 @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) -GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) -GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) -GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) -GEN_VEXT_VV(vwmacc_vv_b, 1, 2) -GEN_VEXT_VV(vwmacc_vv_h, 2, 4) -GEN_VEXT_VV(vwmacc_vv_w, 4, 8) -GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) -GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) -GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) +GEN_VEXT_VV(vwmaccu_vv_b) +GEN_VEXT_VV(vwmaccu_vv_h) +GEN_VEXT_VV(vwmaccu_vv_w) +GEN_VEXT_VV(vwmacc_vv_b) +GEN_VEXT_VV(vwmacc_vv_h) +GEN_VEXT_VV(vwmacc_vv_w) +GEN_VEXT_VV(vwmaccsu_vv_b) +GEN_VEXT_VV(vwmaccsu_vv_h) +GEN_VEXT_VV(vwmaccsu_vv_w) RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) @@ -1790,18 +1788,18 @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) -GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) -GEN_VEXT_VX(vwmacc_vx_b, 1, 2) -GEN_VEXT_VX(vwmacc_vx_h, 2, 4) -GEN_VEXT_VX(vwmacc_vx_w, 4, 8) -GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) -GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) +GEN_VEXT_VX(vwmaccu_vx_b) +GEN_VEXT_VX(vwmaccu_vx_h) +GEN_VEXT_VX(vwmaccu_vx_w) +GEN_VEXT_VX(vwmacc_vx_b) +GEN_VEXT_VX(vwmacc_vx_h) +GEN_VEXT_VX(vwmacc_vx_w) +GEN_VEXT_VX(vwmaccsu_vx_b) +GEN_VEXT_VX(vwmaccsu_vx_h) +GEN_VEXT_VX(vwmaccsu_vx_w) +GEN_VEXT_VX(vwmaccus_vx_b) +GEN_VEXT_VX(vwmaccus_vx_h) +GEN_VEXT_VX(vwmaccus_vx_w) /* Vector Integer Merge and Move Instructions */ #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ @@ -1922,7 +1920,7 @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, static inline void vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, - uint32_t desc, uint32_t esz, uint32_t dsz, + uint32_t desc, opivv2_rm_fn *fn) { uint32_t vm = vext_vm(desc); @@ -1949,11 +1947,11 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, } /* generate helpers for fixed point instructions with OPIVV format */ -#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV_RM(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ do_##NAME); \ } @@ -2004,10 +2002,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) -GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsaddu_vv_b) +GEN_VEXT_VV_RM(vsaddu_vv_h) +GEN_VEXT_VV_RM(vsaddu_vv_w) +GEN_VEXT_VV_RM(vsaddu_vv_d) typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, CPURISCVState *env, int vxrm); @@ -2039,7 +2037,7 @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, static inline void vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, - uint32_t desc, uint32_t esz, uint32_t dsz, + uint32_t desc, opivx2_rm_fn *fn) { uint32_t vm = vext_vm(desc); @@ -2066,11 +2064,11 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, } /* generate helpers for fixed point instructions with OPIVX format */ -#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VX_RM(NAME) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ - vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ do_##NAME); \ } @@ -2078,10 +2076,10 @@ RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) -GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsaddu_vx_b) +GEN_VEXT_VX_RM(vsaddu_vx_h) +GEN_VEXT_VX_RM(vsaddu_vx_w) +GEN_VEXT_VX_RM(vsaddu_vx_d) static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { @@ -2127,19 +2125,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) -GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsadd_vv_b) +GEN_VEXT_VV_RM(vsadd_vv_h) +GEN_VEXT_VV_RM(vsadd_vv_w) +GEN_VEXT_VV_RM(vsadd_vv_d) RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) -GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsadd_vx_b) +GEN_VEXT_VX_RM(vsadd_vx_h) +GEN_VEXT_VX_RM(vsadd_vx_w) +GEN_VEXT_VX_RM(vsadd_vx_d) static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) { @@ -2188,19 +2186,19 @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) -GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssubu_vv_b) +GEN_VEXT_VV_RM(vssubu_vv_h) +GEN_VEXT_VV_RM(vssubu_vv_w) +GEN_VEXT_VV_RM(vssubu_vv_d) RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) -GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssubu_vx_b) +GEN_VEXT_VX_RM(vssubu_vx_h) +GEN_VEXT_VX_RM(vssubu_vx_w) +GEN_VEXT_VX_RM(vssubu_vx_d) static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { @@ -2246,19 +2244,19 @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) -GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssub_vv_b) +GEN_VEXT_VV_RM(vssub_vv_h) +GEN_VEXT_VV_RM(vssub_vv_w) +GEN_VEXT_VV_RM(vssub_vv_d) RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) -GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssub_vx_b) +GEN_VEXT_VX_RM(vssub_vx_h) +GEN_VEXT_VX_RM(vssub_vx_w) +GEN_VEXT_VX_RM(vssub_vx_d) /* Vector Single-Width Averaging Add and Subtract */ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) @@ -2310,19 +2308,19 @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) -GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) -GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) -GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) -GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) +GEN_VEXT_VV_RM(vaadd_vv_b) +GEN_VEXT_VV_RM(vaadd_vv_h) +GEN_VEXT_VV_RM(vaadd_vv_w) +GEN_VEXT_VV_RM(vaadd_vv_d) RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) -GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) -GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) -GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) -GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) +GEN_VEXT_VX_RM(vaadd_vx_b) +GEN_VEXT_VX_RM(vaadd_vx_h) +GEN_VEXT_VX_RM(vaadd_vx_w) +GEN_VEXT_VX_RM(vaadd_vx_d) static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2347,19 +2345,19 @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) -GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vaaddu_vv_b) +GEN_VEXT_VV_RM(vaaddu_vv_h) +GEN_VEXT_VV_RM(vaaddu_vv_w) +GEN_VEXT_VV_RM(vaaddu_vv_d) RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) -GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vaaddu_vx_b) +GEN_VEXT_VX_RM(vaaddu_vx_h) +GEN_VEXT_VX_RM(vaaddu_vx_w) +GEN_VEXT_VX_RM(vaaddu_vx_d) static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) { @@ -2383,19 +2381,19 @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) -GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) -GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) -GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) -GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) +GEN_VEXT_VV_RM(vasub_vv_b) +GEN_VEXT_VV_RM(vasub_vv_h) +GEN_VEXT_VV_RM(vasub_vv_w) +GEN_VEXT_VV_RM(vasub_vv_d) RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) -GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) -GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) -GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) -GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) +GEN_VEXT_VX_RM(vasub_vx_b) +GEN_VEXT_VX_RM(vasub_vx_h) +GEN_VEXT_VX_RM(vasub_vx_w) +GEN_VEXT_VX_RM(vasub_vx_d) static inline uint32_t asubu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2420,19 +2418,19 @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) -GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vasubu_vv_b) +GEN_VEXT_VV_RM(vasubu_vv_h) +GEN_VEXT_VV_RM(vasubu_vv_w) +GEN_VEXT_VV_RM(vasubu_vv_d) RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) -GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vasubu_vx_b) +GEN_VEXT_VX_RM(vasubu_vx_h) +GEN_VEXT_VX_RM(vasubu_vx_w) +GEN_VEXT_VX_RM(vasubu_vx_d) /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -2527,19 +2525,19 @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) -GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsmul_vv_b) +GEN_VEXT_VV_RM(vsmul_vv_h) +GEN_VEXT_VV_RM(vsmul_vv_w) +GEN_VEXT_VV_RM(vsmul_vv_d) RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) -GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsmul_vx_b) +GEN_VEXT_VX_RM(vsmul_vx_h) +GEN_VEXT_VX_RM(vsmul_vx_w) +GEN_VEXT_VX_RM(vsmul_vx_d) /* Vector Single-Width Scaling Shift Instructions */ static inline uint8_t @@ -2586,19 +2584,19 @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) -GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssrl_vv_b) +GEN_VEXT_VV_RM(vssrl_vv_h) +GEN_VEXT_VV_RM(vssrl_vv_w) +GEN_VEXT_VV_RM(vssrl_vv_d) RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) -GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssrl_vx_b) +GEN_VEXT_VX_RM(vssrl_vx_h) +GEN_VEXT_VX_RM(vssrl_vx_w) +GEN_VEXT_VX_RM(vssrl_vx_d) static inline int8_t vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -2645,19 +2643,19 @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) -GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssra_vv_b) +GEN_VEXT_VV_RM(vssra_vv_h) +GEN_VEXT_VV_RM(vssra_vv_w) +GEN_VEXT_VV_RM(vssra_vv_d) RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) -GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssra_vx_b) +GEN_VEXT_VX_RM(vssra_vx_h) +GEN_VEXT_VX_RM(vssra_vx_w) +GEN_VEXT_VX_RM(vssra_vx_d) /* Vector Narrowing Fixed-Point Clip Instructions */ static inline int8_t @@ -2720,16 +2718,16 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) -GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) -GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) -GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) +GEN_VEXT_VV_RM(vnclip_wv_b) +GEN_VEXT_VV_RM(vnclip_wv_h) +GEN_VEXT_VV_RM(vnclip_wv_w) RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) -GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) -GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) -GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) +GEN_VEXT_VX_RM(vnclip_wx_b) +GEN_VEXT_VX_RM(vnclip_wx_h) +GEN_VEXT_VX_RM(vnclip_wx_w) static inline uint8_t vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) @@ -2782,16 +2780,16 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) -GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) -GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) -GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) +GEN_VEXT_VV_RM(vnclipu_wv_b) +GEN_VEXT_VV_RM(vnclipu_wv_h) +GEN_VEXT_VV_RM(vnclipu_wv_w) RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) -GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) -GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) -GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) +GEN_VEXT_VX_RM(vnclipu_wx_b) +GEN_VEXT_VX_RM(vnclipu_wx_h) +GEN_VEXT_VX_RM(vnclipu_wx_w) /* *** Vector Float Point Arithmetic Instructions @@ -2806,7 +2804,7 @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ } -#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV_ENV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ @@ -2827,9 +2825,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) -GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfadd_vv_h) +GEN_VEXT_VV_ENV(vfadd_vv_w) +GEN_VEXT_VV_ENV(vfadd_vv_d) #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -2839,7 +2837,7 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ } -#define GEN_VEXT_VF(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VF(NAME) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ @@ -2860,22 +2858,22 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) -GEN_VEXT_VF(vfadd_vf_h, 2, 2) -GEN_VEXT_VF(vfadd_vf_w, 4, 4) -GEN_VEXT_VF(vfadd_vf_d, 8, 8) +GEN_VEXT_VF(vfadd_vf_h) +GEN_VEXT_VF(vfadd_vf_w) +GEN_VEXT_VF(vfadd_vf_d) RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) -GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsub_vv_h) +GEN_VEXT_VV_ENV(vfsub_vv_w) +GEN_VEXT_VV_ENV(vfsub_vv_d) RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) -GEN_VEXT_VF(vfsub_vf_h, 2, 2) -GEN_VEXT_VF(vfsub_vf_w, 4, 4) -GEN_VEXT_VF(vfsub_vf_d, 8, 8) +GEN_VEXT_VF(vfsub_vf_h) +GEN_VEXT_VF(vfsub_vf_w) +GEN_VEXT_VF(vfsub_vf_d) static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) { @@ -2895,9 +2893,9 @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) -GEN_VEXT_VF(vfrsub_vf_h, 2, 2) -GEN_VEXT_VF(vfrsub_vf_w, 4, 4) -GEN_VEXT_VF(vfrsub_vf_d, 8, 8) +GEN_VEXT_VF(vfrsub_vf_h) +GEN_VEXT_VF(vfrsub_vf_w) +GEN_VEXT_VF(vfrsub_vf_d) /* Vector Widening Floating-Point Add/Subtract Instructions */ static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) @@ -2915,12 +2913,12 @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) -GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwadd_vv_h) +GEN_VEXT_VV_ENV(vfwadd_vv_w) RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) -GEN_VEXT_VF(vfwadd_vf_h, 2, 4) -GEN_VEXT_VF(vfwadd_vf_w, 4, 8) +GEN_VEXT_VF(vfwadd_vf_h) +GEN_VEXT_VF(vfwadd_vf_w) static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { @@ -2937,12 +2935,12 @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) -GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwsub_vv_h) +GEN_VEXT_VV_ENV(vfwsub_vv_w) RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) -GEN_VEXT_VF(vfwsub_vf_h, 2, 4) -GEN_VEXT_VF(vfwsub_vf_w, 4, 8) +GEN_VEXT_VF(vfwsub_vf_h) +GEN_VEXT_VF(vfwsub_vf_w) static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) { @@ -2956,12 +2954,12 @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) -GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwadd_wv_h) +GEN_VEXT_VV_ENV(vfwadd_wv_w) RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) -GEN_VEXT_VF(vfwadd_wf_h, 2, 4) -GEN_VEXT_VF(vfwadd_wf_w, 4, 8) +GEN_VEXT_VF(vfwadd_wf_h) +GEN_VEXT_VF(vfwadd_wf_w) static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) { @@ -2975,39 +2973,39 @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) -GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwsub_wv_h) +GEN_VEXT_VV_ENV(vfwsub_wv_w) RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) -GEN_VEXT_VF(vfwsub_wf_h, 2, 4) -GEN_VEXT_VF(vfwsub_wf_w, 4, 8) +GEN_VEXT_VF(vfwsub_wf_h) +GEN_VEXT_VF(vfwsub_wf_w) /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) -GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmul_vv_h) +GEN_VEXT_VV_ENV(vfmul_vv_w) +GEN_VEXT_VV_ENV(vfmul_vv_d) RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) -GEN_VEXT_VF(vfmul_vf_h, 2, 2) -GEN_VEXT_VF(vfmul_vf_w, 4, 4) -GEN_VEXT_VF(vfmul_vf_d, 8, 8) +GEN_VEXT_VF(vfmul_vf_h) +GEN_VEXT_VF(vfmul_vf_w) +GEN_VEXT_VF(vfmul_vf_d) RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) -GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfdiv_vv_h) +GEN_VEXT_VV_ENV(vfdiv_vv_w) +GEN_VEXT_VV_ENV(vfdiv_vv_d) RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) -GEN_VEXT_VF(vfdiv_vf_h, 2, 2) -GEN_VEXT_VF(vfdiv_vf_w, 4, 4) -GEN_VEXT_VF(vfdiv_vf_d, 8, 8) +GEN_VEXT_VF(vfdiv_vf_h) +GEN_VEXT_VF(vfdiv_vf_w) +GEN_VEXT_VF(vfdiv_vf_d) static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) { @@ -3027,9 +3025,9 @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) -GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) -GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) -GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) +GEN_VEXT_VF(vfrdiv_vf_h) +GEN_VEXT_VF(vfrdiv_vf_w) +GEN_VEXT_VF(vfrdiv_vf_d) /* Vector Widening Floating-Point Multiply */ static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) @@ -3046,12 +3044,12 @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) -GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmul_vv_h) +GEN_VEXT_VV_ENV(vfwmul_vv_w) RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) -GEN_VEXT_VF(vfwmul_vf_h, 2, 4) -GEN_VEXT_VF(vfwmul_vf_w, 4, 8) +GEN_VEXT_VF(vfwmul_vf_h) +GEN_VEXT_VF(vfwmul_vf_w) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -3082,9 +3080,9 @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) -GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmacc_vv_h) +GEN_VEXT_VV_ENV(vfmacc_vv_w) +GEN_VEXT_VV_ENV(vfmacc_vv_d) #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -3098,9 +3096,9 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) -GEN_VEXT_VF(vfmacc_vf_h, 2, 2) -GEN_VEXT_VF(vfmacc_vf_w, 4, 4) -GEN_VEXT_VF(vfmacc_vf_d, 8, 8) +GEN_VEXT_VF(vfmacc_vf_h) +GEN_VEXT_VF(vfmacc_vf_w) +GEN_VEXT_VF(vfmacc_vf_d) static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3123,15 +3121,15 @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) -GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmacc_vv_h) +GEN_VEXT_VV_ENV(vfnmacc_vv_w) +GEN_VEXT_VV_ENV(vfnmacc_vv_d) RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) -GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) -GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) -GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) +GEN_VEXT_VF(vfnmacc_vf_h) +GEN_VEXT_VF(vfnmacc_vf_w) +GEN_VEXT_VF(vfnmacc_vf_d) static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3151,15 +3149,15 @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) -GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmsac_vv_h) +GEN_VEXT_VV_ENV(vfmsac_vv_w) +GEN_VEXT_VV_ENV(vfmsac_vv_d) RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) -GEN_VEXT_VF(vfmsac_vf_h, 2, 2) -GEN_VEXT_VF(vfmsac_vf_w, 4, 4) -GEN_VEXT_VF(vfmsac_vf_d, 8, 8) +GEN_VEXT_VF(vfmsac_vf_h) +GEN_VEXT_VF(vfmsac_vf_w) +GEN_VEXT_VF(vfmsac_vf_d) static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3179,15 +3177,15 @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) -GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmsac_vv_h) +GEN_VEXT_VV_ENV(vfnmsac_vv_w) +GEN_VEXT_VV_ENV(vfnmsac_vv_d) RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) -GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) -GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) -GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) +GEN_VEXT_VF(vfnmsac_vf_h) +GEN_VEXT_VF(vfnmsac_vf_w) +GEN_VEXT_VF(vfnmsac_vf_d) static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3207,15 +3205,15 @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) -GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmadd_vv_h) +GEN_VEXT_VV_ENV(vfmadd_vv_w) +GEN_VEXT_VV_ENV(vfmadd_vv_d) RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) -GEN_VEXT_VF(vfmadd_vf_h, 2, 2) -GEN_VEXT_VF(vfmadd_vf_w, 4, 4) -GEN_VEXT_VF(vfmadd_vf_d, 8, 8) +GEN_VEXT_VF(vfmadd_vf_h) +GEN_VEXT_VF(vfmadd_vf_w) +GEN_VEXT_VF(vfmadd_vf_d) static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3238,15 +3236,15 @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) -GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmadd_vv_h) +GEN_VEXT_VV_ENV(vfnmadd_vv_w) +GEN_VEXT_VV_ENV(vfnmadd_vv_d) RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) -GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) -GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) -GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) +GEN_VEXT_VF(vfnmadd_vf_h) +GEN_VEXT_VF(vfnmadd_vf_w) +GEN_VEXT_VF(vfnmadd_vf_d) static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3266,15 +3264,15 @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) -GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmsub_vv_h) +GEN_VEXT_VV_ENV(vfmsub_vv_w) +GEN_VEXT_VV_ENV(vfmsub_vv_d) RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) -GEN_VEXT_VF(vfmsub_vf_h, 2, 2) -GEN_VEXT_VF(vfmsub_vf_w, 4, 4) -GEN_VEXT_VF(vfmsub_vf_d, 8, 8) +GEN_VEXT_VF(vfmsub_vf_h) +GEN_VEXT_VF(vfmsub_vf_w) +GEN_VEXT_VF(vfmsub_vf_d) static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3294,15 +3292,15 @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) -GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmsub_vv_h) +GEN_VEXT_VV_ENV(vfnmsub_vv_w) +GEN_VEXT_VV_ENV(vfnmsub_vv_d) RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) -GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) -GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) -GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) +GEN_VEXT_VF(vfnmsub_vf_h) +GEN_VEXT_VF(vfnmsub_vf_w) +GEN_VEXT_VF(vfnmsub_vf_d) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) @@ -3319,12 +3317,12 @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) -GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmacc_vv_h) +GEN_VEXT_VV_ENV(vfwmacc_vv_w) RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) -GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) -GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) +GEN_VEXT_VF(vfwmacc_vf_h) +GEN_VEXT_VF(vfwmacc_vf_w) static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -3342,12 +3340,12 @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) -GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w) RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) -GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) -GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) +GEN_VEXT_VF(vfwnmacc_vf_h) +GEN_VEXT_VF(vfwnmacc_vf_w) static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -3365,12 +3363,12 @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) -GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmsac_vv_h) +GEN_VEXT_VV_ENV(vfwmsac_vv_w) RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) -GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) -GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) +GEN_VEXT_VF(vfwmsac_vf_h) +GEN_VEXT_VF(vfwmsac_vf_w) static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -3388,12 +3386,12 @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) -GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w) RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) -GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) -GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) +GEN_VEXT_VF(vfwnmsac_vf_h) +GEN_VEXT_VF(vfwnmsac_vf_w) /* Vector Floating-Point Square-Root Instruction */ /* (TD, T2, TX2) */ @@ -3409,7 +3407,7 @@ static void do_##NAME(void *vd, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ } -#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_V_ENV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ @@ -3432,9 +3430,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) -GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) -GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) -GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) +GEN_VEXT_V_ENV(vfsqrt_v_h) +GEN_VEXT_V_ENV(vfsqrt_v_w) +GEN_VEXT_V_ENV(vfsqrt_v_d) /* * Vector Floating-Point Reciprocal Square-Root Estimate Instruction @@ -3614,9 +3612,9 @@ static float64 frsqrt7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) -GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) -GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) -GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) +GEN_VEXT_V_ENV(vfrsqrt7_v_h) +GEN_VEXT_V_ENV(vfrsqrt7_v_w) +GEN_VEXT_V_ENV(vfrsqrt7_v_d) /* * Vector Floating-Point Reciprocal Estimate Instruction @@ -3805,36 +3803,36 @@ static float64 frec7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) -GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) -GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) -GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) +GEN_VEXT_V_ENV(vfrec7_v_h) +GEN_VEXT_V_ENV(vfrec7_v_w) +GEN_VEXT_V_ENV(vfrec7_v_d) /* Vector Floating-Point MIN/MAX Instructions */ RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) -GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmin_vv_h) +GEN_VEXT_VV_ENV(vfmin_vv_w) +GEN_VEXT_VV_ENV(vfmin_vv_d) RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) -GEN_VEXT_VF(vfmin_vf_h, 2, 2) -GEN_VEXT_VF(vfmin_vf_w, 4, 4) -GEN_VEXT_VF(vfmin_vf_d, 8, 8) +GEN_VEXT_VF(vfmin_vf_h) +GEN_VEXT_VF(vfmin_vf_w) +GEN_VEXT_VF(vfmin_vf_d) RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) -GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmax_vv_h) +GEN_VEXT_VV_ENV(vfmax_vv_w) +GEN_VEXT_VV_ENV(vfmax_vv_d) RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) -GEN_VEXT_VF(vfmax_vf_h, 2, 2) -GEN_VEXT_VF(vfmax_vf_w, 4, 4) -GEN_VEXT_VF(vfmax_vf_d, 8, 8) +GEN_VEXT_VF(vfmax_vf_h) +GEN_VEXT_VF(vfmax_vf_w) +GEN_VEXT_VF(vfmax_vf_d) /* Vector Floating-Point Sign-Injection Instructions */ static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) @@ -3855,15 +3853,15 @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) -GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnj_vv_h) +GEN_VEXT_VV_ENV(vfsgnj_vv_w) +GEN_VEXT_VV_ENV(vfsgnj_vv_d) RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) -GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnj_vf_h) +GEN_VEXT_VF(vfsgnj_vf_w) +GEN_VEXT_VF(vfsgnj_vf_d) static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) { @@ -3883,15 +3881,15 @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) -GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d) RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) -GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnjn_vf_h) +GEN_VEXT_VF(vfsgnjn_vf_w) +GEN_VEXT_VF(vfsgnjn_vf_d) static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) { @@ -3911,15 +3909,15 @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) -GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d) RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) -GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnjx_vf_h) +GEN_VEXT_VF(vfsgnjx_vf_w) +GEN_VEXT_VF(vfsgnjx_vf_d) /* Vector Floating-Point Compare Instructions */ #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ @@ -4063,7 +4061,7 @@ static void do_##NAME(void *vd, void *vs2, int i) \ *((TD *)vd + HD(i)) = OP(s2); \ } -#define GEN_VEXT_V(NAME, ESZ, DSZ) \ +#define GEN_VEXT_V(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ @@ -4140,9 +4138,9 @@ target_ulong fclass_d(uint64_t frs1) RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) -GEN_VEXT_V(vfclass_v_h, 2, 2) -GEN_VEXT_V(vfclass_v_w, 4, 4) -GEN_VEXT_V(vfclass_v_d, 8, 8) +GEN_VEXT_V(vfclass_v_h) +GEN_VEXT_V(vfclass_v_w) +GEN_VEXT_V(vfclass_v_d) /* Vector Floating-Point Merge Instruction */ #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ @@ -4170,33 +4168,33 @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) -GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d) /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d) /* Widening Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4206,30 +4204,30 @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) /* * vfwcvt.f.f.v vd, vs2, vm @@ -4242,8 +4240,8 @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) /* Narrowing Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4254,29 +4252,29 @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) -GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) -GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_x_f_w_b) +GEN_VEXT_V_ENV(vfncvt_x_f_w_h) +GEN_VEXT_V_ENV(vfncvt_x_f_w_w) /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_x_w_h) +GEN_VEXT_V_ENV(vfncvt_f_x_w_w) /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ static uint16_t vfncvtffv16(uint32_t a, float_status *s) @@ -4286,8 +4284,8 @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s) RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_f_w_h) +GEN_VEXT_V_ENV(vfncvt_f_f_w_w) /* *** Vector Reduction Operations From patchwork Wed Mar 9 08:34:29 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611070 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT2mL356pz9sFk for ; Wed, 30 Mar 2022 21:31:46 +1100 (AEDT) Received: from localhost ([::1]:44126 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVc7-0000j6-UK for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:31:43 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44272) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVH-0004rn-Kr; Wed, 30 Mar 2022 06:24:41 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37016) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVF-00074h-Gt; Wed, 30 Mar 2022 06:24:39 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 060A211EF78; Wed, 30 Mar 2022 10:24:35 +0000 (UTC) From: ~eopxd Date: Wed, 09 Mar 2022 00:34:29 -0800 Subject: [PATCH qemu v7 02/14] target/riscv: rvv: Rename ambiguous esz Message-ID: <164863587444.17401.9965527486691250478-2@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD No functional change intended in this commit. Signed-off-by: eop Chen Reviewed-by: Frank Chang Reviewed-by: Alistair Francis --- target/riscv/vector_helper.c | 76 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index e94caf1a3c..d0452a7756 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -125,9 +125,9 @@ static inline int32_t vext_lmul(uint32_t desc) /* * Get the maximum number of elements can be operated. * - * esz: log2 of element size in bytes. + * log2_esz: log2 of element size in bytes. */ -static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) +static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) { /* * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. @@ -136,7 +136,7 @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) uint32_t vlenb = simd_maxsz(desc); /* Return VLMAX */ - int scale = vext_lmul(desc) - esz; + int scale = vext_lmul(desc) - log2_esz; return scale < 0 ? vlenb >> -scale : vlenb << scale; } @@ -231,11 +231,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, CPURISCVState *env, uint32_t desc, uint32_t vm, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra, MMUAccessType access_type) + uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type) { uint32_t i, k; uint32_t nf = vext_nf(desc); - uint32_t max_elems = vext_max_elems(desc, esz); + uint32_t max_elems = vext_max_elems(desc, log2_esz); for (i = env->vstart; i < env->vl; i++, env->vstart++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -244,7 +244,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, k = 0; while (k < nf) { - target_ulong addr = base + stride * i + (k << esz); + target_ulong addr = base + stride * i + (k << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; } @@ -289,18 +289,18 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) /* unmasked unit-stride load and store operation*/ static void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, uintptr_t ra, MMUAccessType access_type) { uint32_t i, k; uint32_t nf = vext_nf(desc); - uint32_t max_elems = vext_max_elems(desc, esz); + uint32_t max_elems = vext_max_elems(desc, log2_esz); /* load bytes from guest memory */ for (i = env->vstart; i < evl; i++, env->vstart++) { k = 0; while (k < nf) { - target_ulong addr = base + ((i * nf + k) << esz); + target_ulong addr = base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; } @@ -399,12 +399,12 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, void *vs2, CPURISCVState *env, uint32_t desc, vext_get_index_addr get_index_addr, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra, MMUAccessType access_type) + uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type) { uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); - uint32_t max_elems = vext_max_elems(desc, esz); + uint32_t max_elems = vext_max_elems(desc, log2_esz); /* load bytes from guest memory */ for (i = env->vstart; i < env->vl; i++, env->vstart++) { @@ -414,7 +414,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, k = 0; while (k < nf) { - abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); + abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; } @@ -480,13 +480,13 @@ static inline void vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra) + uint32_t log2_esz, uintptr_t ra) { void *host; uint32_t i, k, vl = 0; uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); - uint32_t max_elems = vext_max_elems(desc, esz); + uint32_t max_elems = vext_max_elems(desc, log2_esz); target_ulong addr, offset, remain; /* probe every access*/ @@ -494,12 +494,12 @@ vext_ldff(void *vd, void *v0, target_ulong base, if (!vm && !vext_elem_mask(v0, i)) { continue; } - addr = adjust_addr(env, base + i * (nf << esz)); + addr = adjust_addr(env, base + i * (nf << log2_esz)); if (i == 0) { - probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); + probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); } else { /* if it triggers an exception, no need to check watchpoint */ - remain = nf << esz; + remain = nf << log2_esz; while (remain > 0) { offset = -(addr | TARGET_PAGE_MASK); host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, @@ -536,7 +536,7 @@ ProbeSuccess: continue; } while (k < nf) { - target_ulong addr = base + ((i * nf + k) << esz); + target_ulong addr = base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; } @@ -576,13 +576,13 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) */ static void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type) { uint32_t i, k, off, pos; uint32_t nf = vext_nf(desc); uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; - uint32_t max_elems = vlenb >> esz; + uint32_t max_elems = vlenb >> log2_esz; k = env->vstart / max_elems; off = env->vstart % max_elems; @@ -590,7 +590,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, if (off) { /* load/store rest of elements of current segment pointed by vstart */ for (pos = off; pos < max_elems; pos++, env->vstart++) { - target_ulong addr = base + ((pos + k * max_elems) << esz); + target_ulong addr = base + ((pos + k * max_elems) << log2_esz); ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); } k++; @@ -599,7 +599,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, /* load/store elements for rest of segments */ for (; k < nf; k++) { for (i = 0; i < max_elems; i++, env->vstart++) { - target_ulong addr = base + ((i + k * max_elems) << esz); + target_ulong addr = base + ((i + k * max_elems) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); } } @@ -4691,11 +4691,11 @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) -#define GEN_VEXT_VSLIE1UP(ESZ, H) \ -static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ +#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ +static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ { \ - typedef uint##ESZ##_t ETYPE; \ + typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t i; \ @@ -4718,11 +4718,11 @@ GEN_VEXT_VSLIE1UP(16, H2) GEN_VEXT_VSLIE1UP(32, H4) GEN_VEXT_VSLIE1UP(64, H8) -#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ +#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ @@ -4731,11 +4731,11 @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) -#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ -static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ +#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ +static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ { \ - typedef uint##ESZ##_t ETYPE; \ + typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t i; \ @@ -4758,11 +4758,11 @@ GEN_VEXT_VSLIDE1DOWN(16, H2) GEN_VEXT_VSLIDE1DOWN(32, H4) GEN_VEXT_VSLIDE1DOWN(64, H8) -#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ @@ -4772,11 +4772,11 @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) /* Vector Floating-Point Slide Instructions */ -#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ +#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ @@ -4784,11 +4784,11 @@ GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) -#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ +#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ From patchwork Sat Mar 12 06:28:22 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611072 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT2xF66X8z9sFk for ; Wed, 30 Mar 2022 21:39:29 +1100 (AEDT) Received: from localhost ([::1]:57554 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVjb-0001Tz-Tg for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:39:27 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44270) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVH-0004rm-I0; Wed, 30 Mar 2022 06:24:41 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37018) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVF-00074n-HW; Wed, 30 Mar 2022 06:24:39 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 251CD11EF99; Wed, 30 Mar 2022 10:24:35 +0000 (UTC) From: ~eopxd Date: Fri, 11 Mar 2022 22:28:22 -0800 Subject: [PATCH qemu v7 03/14] target/riscv: rvv: Early exit when vstart >= vl MIME-Version: 1.0 Message-ID: <164863587444.17401.9965527486691250478-3@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD According to v-spec (section 5.4): When vstart ≥ vl, there are no body elements, and no elements are updated in any destination vector register group, including that no tail elements are updated with agnostic values. vmsbf.m, vmsif.m, vmsof.m, viota.m, vcompress instructions themselves require vstart to be zero. So they don't need the early exit. Signed-off-by: eop Chen Reviewed-by: Frank Chang Acked-by: Alistair Francis --- target/riscv/insn_trans/trans_rvv.c.inc | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 275fded6e4..57953923d5 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -652,6 +652,7 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -818,6 +819,7 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -925,6 +927,7 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -1067,6 +1070,7 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -1216,6 +1220,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, } tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); if (a->vm && s->vl_eq_vlmax) { gvec_fn(s->sew, vreg_ofs(s, a->rd), @@ -1263,6 +1268,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -1427,6 +1433,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -1513,6 +1520,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, uint32_t data = 0; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -1593,6 +1601,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, uint32_t data = 0; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -1670,6 +1679,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -1851,6 +1861,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2061,6 +2072,7 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) }; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), cpu_env, s->cfg_ptr->vlen / 8, @@ -2084,6 +2096,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) TCGv s1; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); s1 = get_gpr(s, a->rs1, EXT_SIGN); @@ -2139,6 +2152,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) }; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); s1 = tcg_constant_i64(simm); dest = tcg_temp_new_ptr(); @@ -2291,6 +2305,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2321,6 +2336,7 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -2409,6 +2425,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2483,6 +2500,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2604,6 +2622,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, TCGLabel *over = gen_new_label(); gen_set_rm(s, rm); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -2717,6 +2736,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) }; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); t1 = tcg_temp_new_i64(); /* NaN-box f[rs1] */ @@ -2805,6 +2825,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2856,6 +2877,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -2921,6 +2943,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2974,6 +2997,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -3061,6 +3085,7 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ TCGLabel *over = gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -3229,6 +3254,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) uint32_t data = 0; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -3746,6 +3772,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) gen_helper_gvec_3_ptr *fn; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); static gen_helper_gvec_3_ptr * const fns[6][4] = { { From patchwork Tue Mar 1 09:07:38 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611078 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT34R25dnz9sFk for ; Wed, 30 Mar 2022 21:45:41 +1100 (AEDT) Received: from localhost ([::1]:45308 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVpa-00045u-K4 for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:45:38 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44332) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVJ-0004tb-W0; Wed, 30 Mar 2022 06:24:42 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37020) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVF-00074s-Ht; Wed, 30 Mar 2022 06:24:41 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 4E1C511EFBD; Wed, 30 Mar 2022 10:24:35 +0000 (UTC) From: ~eopxd Date: Tue, 01 Mar 2022 01:07:38 -0800 Subject: [PATCH qemu v7 04/14] target/riscv: rvv: Add tail agnostic for vv instructions Message-ID: <164863587444.17401.9965527486691250478-4@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD This is the first commit regarding the tail agnostic behavior. Added option 'rvv_ta_all_1s' to enable the behavior, the option is default to false. Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/cpu.c | 1 + target/riscv/cpu.h | 2 + target/riscv/cpu_helper.c | 2 + target/riscv/insn_trans/trans_rvv.c.inc | 11 + target/riscv/internals.h | 5 +- target/riscv/translate.c | 2 + target/riscv/vector_helper.c | 315 ++++++++++++++---------- 7 files changed, 207 insertions(+), 131 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index ddda4906ff..cd4cf4b41e 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -810,6 +810,7 @@ static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), + DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index c069fe85fa..8c4a79b5a0 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -369,6 +369,7 @@ struct RISCVCPUConfig { bool ext_zhinxmin; bool ext_zve32f; bool ext_zve64f; + bool rvv_ta_all_1s; /* Vendor-specific custom extensions */ bool ext_XVentanaCondOps; @@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2) /* If PointerMasking should be applied */ FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1) FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1) +FIELD(TB_FLAGS, VTA, 24, 1) #ifdef TARGET_RISCV32 #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 1c60fb2e80..2941c88c31 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, flags = FIELD_DP32(flags, TB_FLAGS, LMUL, FIELD_EX64(env->vtype, VTYPE, VLMUL)); flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); + flags = FIELD_DP32(flags, TB_FLAGS, VTA, + FIELD_EX64(env->vtype, VTYPE, VTA)); } else { flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); } diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 57953923d5..cc80bf00ff 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1223,6 +1223,16 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); if (a->vm && s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), MAXSZ(s), MAXSZ(s)); @@ -1231,6 +1241,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, diff --git a/target/riscv/internals.h b/target/riscv/internals.h index dbb322bfa7..512c6c30cf 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -24,8 +24,9 @@ /* share data between vector helpers and decode code */ FIELD(VDATA, VM, 0, 1) FIELD(VDATA, LMUL, 1, 3) -FIELD(VDATA, NF, 4, 4) -FIELD(VDATA, WD, 4, 1) +FIELD(VDATA, VTA, 4, 1) +FIELD(VDATA, NF, 5, 4) +FIELD(VDATA, WD, 5, 1) /* float point classify helpers */ target_ulong fclass_h(uint64_t frs1); diff --git a/target/riscv/translate.c b/target/riscv/translate.c index fac998a6b5..7775dade26 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -94,6 +94,7 @@ typedef struct DisasContext { */ int8_t lmul; uint8_t sew; + uint8_t vta; target_ulong vstart; bool vl_eq_vlmax; uint8_t ntemp; @@ -1083,6 +1084,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); + ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s; ctx->vstart = env->vstart; ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); ctx->misa_mxl_max = env->misa_mxl_max; diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index d0452a7756..ea1c409e16 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -122,6 +122,11 @@ static inline int32_t vext_lmul(uint32_t desc) return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); } +static inline uint32_t vext_vta(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA); +} + /* * Get the maximum number of elements can be operated. * @@ -140,6 +145,19 @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) return scale < 0 ? vlenb >> -scale : vlenb << scale; } +/* + * Get number of total elements, including prestart, body and tail elements. + * Note that when LMUL < 1, the tail includes the elements past VLMAX that + * are held in the same vector register. + */ +static inline uint32_t vext_get_total_elems(uint32_t desc, uint32_t esz) +{ + uint32_t vlenb = simd_maxsz(desc); + int8_t lmul = vext_lmul(desc) < 0 ? 0 : vext_lmul(desc); + + return (vlenb << lmul) / esz; +} + static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) { return (addr & env->cur_pmmask) | env->cur_pmbase; @@ -172,6 +190,33 @@ static void probe_pages(CPURISCVState *env, target_ulong addr, } } +static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, + uint32_t tot) +{ + if (is_agnostic == 0) { + /* policy undisturbed */ + return; + } + if (tot - cnt == 0) { + return ; + } + memset(base, -1, tot - cnt); +} + +/* set agnostic elements to 1s */ +#define GEN_SET_ELEMS_1S(SET_ELEMS_1S_FN, ETYPE, H) \ +static void SET_ELEMS_1S_FN(void *vd, uint32_t is_agnostic, uint32_t idx, \ + uint32_t cnt, uint32_t tot) \ +{ \ + ETYPE *cur = ((ETYPE *)vd + H(idx)); \ + vext_set_elems_1s(cur, is_agnostic, cnt, tot); \ +} + +GEN_SET_ELEMS_1S(vext_set_elems_1s_b, int8_t, H1) +GEN_SET_ELEMS_1S(vext_set_elems_1s_h, int16_t, H2) +GEN_SET_ELEMS_1S(vext_set_elems_1s_w, int32_t, H4) +GEN_SET_ELEMS_1S(vext_set_elems_1s_d, int64_t, H8) + static inline void vext_set_elem_mask(void *v0, int index, uint8_t value) { @@ -197,6 +242,14 @@ static inline int vext_elem_mask(void *v0, int index) typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, uint32_t idx, void *vd, uintptr_t retaddr); +/* set bytes to all 1s for agnostic elements */ +typedef void vext_set_elems_1s_fn(void *vd, uint32_t vta, uint32_t idx, + uint32_t cnt, uint32_t tot); +static vext_set_elems_1s_fn *vext_set_elems_1s_fns[4] = { + vext_set_elems_1s_b, vext_set_elems_1s_h, + vext_set_elems_1s_w, vext_set_elems_1s_d +}; + #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ static void NAME(CPURISCVState *env, abi_ptr addr, \ uint32_t idx, void *vd, uintptr_t retaddr)\ @@ -710,10 +763,12 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - opivv2_fn *fn) + opivv2_fn *fn, uint32_t esz) { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(desc, esz); + uint32_t vta = vext_vta(desc); uint32_t i; for (i = env->vstart; i < vl; i++) { @@ -723,26 +778,28 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, fn(vd, vs1, vs2, i); } env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, total_elems * esz); } /* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME) \ +#define GEN_VEXT_VV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ do_vext_vv(vd, v0, vs1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } -GEN_VEXT_VV(vadd_vv_b) -GEN_VEXT_VV(vadd_vv_h) -GEN_VEXT_VV(vadd_vv_w) -GEN_VEXT_VV(vadd_vv_d) -GEN_VEXT_VV(vsub_vv_b) -GEN_VEXT_VV(vsub_vv_h) -GEN_VEXT_VV(vsub_vv_w) -GEN_VEXT_VV(vsub_vv_d) +GEN_VEXT_VV(vadd_vv_b, 1) +GEN_VEXT_VV(vadd_vv_h, 2) +GEN_VEXT_VV(vadd_vv_w, 4) +GEN_VEXT_VV(vadd_vv_d, 8) +GEN_VEXT_VV(vsub_vv_b, 1) +GEN_VEXT_VV(vsub_vv_h, 2) +GEN_VEXT_VV(vsub_vv_w, 4) +GEN_VEXT_VV(vsub_vv_d, 8) typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); @@ -887,30 +944,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) -GEN_VEXT_VV(vwaddu_vv_b) -GEN_VEXT_VV(vwaddu_vv_h) -GEN_VEXT_VV(vwaddu_vv_w) -GEN_VEXT_VV(vwsubu_vv_b) -GEN_VEXT_VV(vwsubu_vv_h) -GEN_VEXT_VV(vwsubu_vv_w) -GEN_VEXT_VV(vwadd_vv_b) -GEN_VEXT_VV(vwadd_vv_h) -GEN_VEXT_VV(vwadd_vv_w) -GEN_VEXT_VV(vwsub_vv_b) -GEN_VEXT_VV(vwsub_vv_h) -GEN_VEXT_VV(vwsub_vv_w) -GEN_VEXT_VV(vwaddu_wv_b) -GEN_VEXT_VV(vwaddu_wv_h) -GEN_VEXT_VV(vwaddu_wv_w) -GEN_VEXT_VV(vwsubu_wv_b) -GEN_VEXT_VV(vwsubu_wv_h) -GEN_VEXT_VV(vwsubu_wv_w) -GEN_VEXT_VV(vwadd_wv_b) -GEN_VEXT_VV(vwadd_wv_h) -GEN_VEXT_VV(vwadd_wv_w) -GEN_VEXT_VV(vwsub_wv_b) -GEN_VEXT_VV(vwsub_wv_h) -GEN_VEXT_VV(vwsub_wv_w) +GEN_VEXT_VV(vwaddu_vv_b, 2) +GEN_VEXT_VV(vwaddu_vv_h, 4) +GEN_VEXT_VV(vwaddu_vv_w, 8) +GEN_VEXT_VV(vwsubu_vv_b, 2) +GEN_VEXT_VV(vwsubu_vv_h, 4) +GEN_VEXT_VV(vwsubu_vv_w, 8) +GEN_VEXT_VV(vwadd_vv_b, 2) +GEN_VEXT_VV(vwadd_vv_h, 4) +GEN_VEXT_VV(vwadd_vv_w, 8) +GEN_VEXT_VV(vwsub_vv_b, 2) +GEN_VEXT_VV(vwsub_vv_h, 4) +GEN_VEXT_VV(vwsub_vv_w, 8) +GEN_VEXT_VV(vwaddu_wv_b, 2) +GEN_VEXT_VV(vwaddu_wv_h, 4) +GEN_VEXT_VV(vwaddu_wv_w, 8) +GEN_VEXT_VV(vwsubu_wv_b, 2) +GEN_VEXT_VV(vwsubu_wv_h, 4) +GEN_VEXT_VV(vwsubu_wv_w, 8) +GEN_VEXT_VV(vwadd_wv_b, 2) +GEN_VEXT_VV(vwadd_wv_h, 4) +GEN_VEXT_VV(vwadd_wv_w, 8) +GEN_VEXT_VV(vwsub_wv_b, 2) +GEN_VEXT_VV(vwsub_wv_h, 4) +GEN_VEXT_VV(vwsub_wv_w, 8) RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) @@ -1089,18 +1146,18 @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) -GEN_VEXT_VV(vand_vv_b) -GEN_VEXT_VV(vand_vv_h) -GEN_VEXT_VV(vand_vv_w) -GEN_VEXT_VV(vand_vv_d) -GEN_VEXT_VV(vor_vv_b) -GEN_VEXT_VV(vor_vv_h) -GEN_VEXT_VV(vor_vv_w) -GEN_VEXT_VV(vor_vv_d) -GEN_VEXT_VV(vxor_vv_b) -GEN_VEXT_VV(vxor_vv_h) -GEN_VEXT_VV(vxor_vv_w) -GEN_VEXT_VV(vxor_vv_d) +GEN_VEXT_VV(vand_vv_b, 1) +GEN_VEXT_VV(vand_vv_h, 2) +GEN_VEXT_VV(vand_vv_w, 4) +GEN_VEXT_VV(vand_vv_d, 8) +GEN_VEXT_VV(vor_vv_b, 1) +GEN_VEXT_VV(vor_vv_h, 2) +GEN_VEXT_VV(vor_vv_w, 4) +GEN_VEXT_VV(vor_vv_d, 8) +GEN_VEXT_VV(vxor_vv_b, 1) +GEN_VEXT_VV(vxor_vv_h, 2) +GEN_VEXT_VV(vxor_vv_w, 4) +GEN_VEXT_VV(vxor_vv_d, 8) RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) @@ -1346,22 +1403,22 @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) -GEN_VEXT_VV(vminu_vv_b) -GEN_VEXT_VV(vminu_vv_h) -GEN_VEXT_VV(vminu_vv_w) -GEN_VEXT_VV(vminu_vv_d) -GEN_VEXT_VV(vmin_vv_b) -GEN_VEXT_VV(vmin_vv_h) -GEN_VEXT_VV(vmin_vv_w) -GEN_VEXT_VV(vmin_vv_d) -GEN_VEXT_VV(vmaxu_vv_b) -GEN_VEXT_VV(vmaxu_vv_h) -GEN_VEXT_VV(vmaxu_vv_w) -GEN_VEXT_VV(vmaxu_vv_d) -GEN_VEXT_VV(vmax_vv_b) -GEN_VEXT_VV(vmax_vv_h) -GEN_VEXT_VV(vmax_vv_w) -GEN_VEXT_VV(vmax_vv_d) +GEN_VEXT_VV(vminu_vv_b, 1) +GEN_VEXT_VV(vminu_vv_h, 2) +GEN_VEXT_VV(vminu_vv_w, 4) +GEN_VEXT_VV(vminu_vv_d, 8) +GEN_VEXT_VV(vmin_vv_b, 1) +GEN_VEXT_VV(vmin_vv_h, 2) +GEN_VEXT_VV(vmin_vv_w, 4) +GEN_VEXT_VV(vmin_vv_d, 8) +GEN_VEXT_VV(vmaxu_vv_b, 1) +GEN_VEXT_VV(vmaxu_vv_h, 2) +GEN_VEXT_VV(vmaxu_vv_w, 4) +GEN_VEXT_VV(vmaxu_vv_d, 8) +GEN_VEXT_VV(vmax_vv_b, 1) +GEN_VEXT_VV(vmax_vv_h, 2) +GEN_VEXT_VV(vmax_vv_w, 4) +GEN_VEXT_VV(vmax_vv_d, 8) RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) @@ -1402,10 +1459,10 @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) -GEN_VEXT_VV(vmul_vv_b) -GEN_VEXT_VV(vmul_vv_h) -GEN_VEXT_VV(vmul_vv_w) -GEN_VEXT_VV(vmul_vv_d) +GEN_VEXT_VV(vmul_vv_b, 1) +GEN_VEXT_VV(vmul_vv_h, 2) +GEN_VEXT_VV(vmul_vv_w, 4) +GEN_VEXT_VV(vmul_vv_d, 8) static int8_t do_mulh_b(int8_t s2, int8_t s1) { @@ -1509,18 +1566,18 @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) -GEN_VEXT_VV(vmulh_vv_b) -GEN_VEXT_VV(vmulh_vv_h) -GEN_VEXT_VV(vmulh_vv_w) -GEN_VEXT_VV(vmulh_vv_d) -GEN_VEXT_VV(vmulhu_vv_b) -GEN_VEXT_VV(vmulhu_vv_h) -GEN_VEXT_VV(vmulhu_vv_w) -GEN_VEXT_VV(vmulhu_vv_d) -GEN_VEXT_VV(vmulhsu_vv_b) -GEN_VEXT_VV(vmulhsu_vv_h) -GEN_VEXT_VV(vmulhsu_vv_w) -GEN_VEXT_VV(vmulhsu_vv_d) +GEN_VEXT_VV(vmulh_vv_b, 1) +GEN_VEXT_VV(vmulh_vv_h, 2) +GEN_VEXT_VV(vmulh_vv_w, 4) +GEN_VEXT_VV(vmulh_vv_d, 8) +GEN_VEXT_VV(vmulhu_vv_b, 1) +GEN_VEXT_VV(vmulhu_vv_h, 2) +GEN_VEXT_VV(vmulhu_vv_w, 4) +GEN_VEXT_VV(vmulhu_vv_d, 8) +GEN_VEXT_VV(vmulhsu_vv_b, 1) +GEN_VEXT_VV(vmulhsu_vv_h, 2) +GEN_VEXT_VV(vmulhsu_vv_w, 4) +GEN_VEXT_VV(vmulhsu_vv_d, 8) RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) @@ -1579,22 +1636,22 @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) -GEN_VEXT_VV(vdivu_vv_b) -GEN_VEXT_VV(vdivu_vv_h) -GEN_VEXT_VV(vdivu_vv_w) -GEN_VEXT_VV(vdivu_vv_d) -GEN_VEXT_VV(vdiv_vv_b) -GEN_VEXT_VV(vdiv_vv_h) -GEN_VEXT_VV(vdiv_vv_w) -GEN_VEXT_VV(vdiv_vv_d) -GEN_VEXT_VV(vremu_vv_b) -GEN_VEXT_VV(vremu_vv_h) -GEN_VEXT_VV(vremu_vv_w) -GEN_VEXT_VV(vremu_vv_d) -GEN_VEXT_VV(vrem_vv_b) -GEN_VEXT_VV(vrem_vv_h) -GEN_VEXT_VV(vrem_vv_w) -GEN_VEXT_VV(vrem_vv_d) +GEN_VEXT_VV(vdivu_vv_b, 1) +GEN_VEXT_VV(vdivu_vv_h, 2) +GEN_VEXT_VV(vdivu_vv_w, 4) +GEN_VEXT_VV(vdivu_vv_d, 8) +GEN_VEXT_VV(vdiv_vv_b, 1) +GEN_VEXT_VV(vdiv_vv_h, 2) +GEN_VEXT_VV(vdiv_vv_w, 4) +GEN_VEXT_VV(vdiv_vv_d, 8) +GEN_VEXT_VV(vremu_vv_b, 1) +GEN_VEXT_VV(vremu_vv_h, 2) +GEN_VEXT_VV(vremu_vv_w, 4) +GEN_VEXT_VV(vremu_vv_d, 8) +GEN_VEXT_VV(vrem_vv_b, 1) +GEN_VEXT_VV(vrem_vv_h, 2) +GEN_VEXT_VV(vrem_vv_w, 4) +GEN_VEXT_VV(vrem_vv_d, 8) RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) @@ -1639,15 +1696,15 @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) -GEN_VEXT_VV(vwmul_vv_b) -GEN_VEXT_VV(vwmul_vv_h) -GEN_VEXT_VV(vwmul_vv_w) -GEN_VEXT_VV(vwmulu_vv_b) -GEN_VEXT_VV(vwmulu_vv_h) -GEN_VEXT_VV(vwmulu_vv_w) -GEN_VEXT_VV(vwmulsu_vv_b) -GEN_VEXT_VV(vwmulsu_vv_h) -GEN_VEXT_VV(vwmulsu_vv_w) +GEN_VEXT_VV(vwmul_vv_b, 2) +GEN_VEXT_VV(vwmul_vv_h, 4) +GEN_VEXT_VV(vwmul_vv_w, 8) +GEN_VEXT_VV(vwmulu_vv_b, 2) +GEN_VEXT_VV(vwmulu_vv_h, 4) +GEN_VEXT_VV(vwmulu_vv_w, 8) +GEN_VEXT_VV(vwmulsu_vv_b, 2) +GEN_VEXT_VV(vwmulsu_vv_h, 4) +GEN_VEXT_VV(vwmulsu_vv_w, 8) RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) @@ -1698,22 +1755,22 @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) -GEN_VEXT_VV(vmacc_vv_b) -GEN_VEXT_VV(vmacc_vv_h) -GEN_VEXT_VV(vmacc_vv_w) -GEN_VEXT_VV(vmacc_vv_d) -GEN_VEXT_VV(vnmsac_vv_b) -GEN_VEXT_VV(vnmsac_vv_h) -GEN_VEXT_VV(vnmsac_vv_w) -GEN_VEXT_VV(vnmsac_vv_d) -GEN_VEXT_VV(vmadd_vv_b) -GEN_VEXT_VV(vmadd_vv_h) -GEN_VEXT_VV(vmadd_vv_w) -GEN_VEXT_VV(vmadd_vv_d) -GEN_VEXT_VV(vnmsub_vv_b) -GEN_VEXT_VV(vnmsub_vv_h) -GEN_VEXT_VV(vnmsub_vv_w) -GEN_VEXT_VV(vnmsub_vv_d) +GEN_VEXT_VV(vmacc_vv_b, 1) +GEN_VEXT_VV(vmacc_vv_h, 2) +GEN_VEXT_VV(vmacc_vv_w, 4) +GEN_VEXT_VV(vmacc_vv_d, 8) +GEN_VEXT_VV(vnmsac_vv_b, 1) +GEN_VEXT_VV(vnmsac_vv_h, 2) +GEN_VEXT_VV(vnmsac_vv_w, 4) +GEN_VEXT_VV(vnmsac_vv_d, 8) +GEN_VEXT_VV(vmadd_vv_b, 1) +GEN_VEXT_VV(vmadd_vv_h, 2) +GEN_VEXT_VV(vmadd_vv_w, 4) +GEN_VEXT_VV(vmadd_vv_d, 8) +GEN_VEXT_VV(vnmsub_vv_b, 1) +GEN_VEXT_VV(vnmsub_vv_h, 2) +GEN_VEXT_VV(vnmsub_vv_w, 4) +GEN_VEXT_VV(vnmsub_vv_d, 8) #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ @@ -1766,15 +1823,15 @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) -GEN_VEXT_VV(vwmaccu_vv_b) -GEN_VEXT_VV(vwmaccu_vv_h) -GEN_VEXT_VV(vwmaccu_vv_w) -GEN_VEXT_VV(vwmacc_vv_b) -GEN_VEXT_VV(vwmacc_vv_h) -GEN_VEXT_VV(vwmacc_vv_w) -GEN_VEXT_VV(vwmaccsu_vv_b) -GEN_VEXT_VV(vwmaccsu_vv_h) -GEN_VEXT_VV(vwmaccsu_vv_w) +GEN_VEXT_VV(vwmaccu_vv_b, 2) +GEN_VEXT_VV(vwmaccu_vv_h, 4) +GEN_VEXT_VV(vwmaccu_vv_w, 8) +GEN_VEXT_VV(vwmacc_vv_b, 2) +GEN_VEXT_VV(vwmacc_vv_h, 4) +GEN_VEXT_VV(vwmacc_vv_w, 8) +GEN_VEXT_VV(vwmaccsu_vv_b, 2) +GEN_VEXT_VV(vwmaccsu_vv_h, 4) +GEN_VEXT_VV(vwmaccsu_vv_w, 8) RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) From patchwork Mon Mar 7 07:10:25 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611084 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT38w0rTpz9sFk for ; Wed, 30 Mar 2022 21:49:36 +1100 (AEDT) Received: from localhost ([::1]:52934 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVtN-0001OP-Ka for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:49:33 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44378) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVL-0004w9-65; Wed, 30 Mar 2022 06:24:43 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37022) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVI-00075J-2m; Wed, 30 Mar 2022 06:24:42 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 71EB511EFCD; Wed, 30 Mar 2022 10:24:35 +0000 (UTC) From: ~eopxd Date: Sun, 06 Mar 2022 23:10:25 -0800 Subject: [PATCH qemu v7 05/14] target/riscv: rvv: Add tail agnostic for vector load / store instructions Message-ID: <164863587444.17401.9965527486691250478-5@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 9 ++++++++ target/riscv/vector_helper.c | 28 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index cc80bf00ff..66cfc8c603 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -711,6 +711,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); } @@ -748,6 +749,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, true); } @@ -774,6 +776,7 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew) /* EMUL = 1, NFIELDS = 1 */ data = FIELD_DP32(data, VDATA, LMUL, 0); data = FIELD_DP32(data, VDATA, NF, 1); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); } @@ -791,6 +794,7 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew) /* EMUL = 1, NFIELDS = 1 */ data = FIELD_DP32(data, VDATA, LMUL, 0); data = FIELD_DP32(data, VDATA, NF, 1); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, true); } @@ -862,6 +866,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); } @@ -891,6 +896,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); fn = fns[eew]; if (fn == NULL) { return false; @@ -991,6 +997,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); } @@ -1043,6 +1050,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true); } @@ -1108,6 +1116,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldff_trans(a->rd, a->rs1, data, fn, s); } diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index ea1c409e16..a72b59554e 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -289,6 +289,8 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t vta = vext_vta(desc); for (i = env->vstart; i < env->vl; i++, env->vstart++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -303,6 +305,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * max_elems, + env->vl * esz, max_elems * esz); + } } #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ @@ -348,6 +355,8 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t vta = vext_vta(desc); /* load bytes from guest memory */ for (i = env->vstart; i < evl; i++, env->vstart++) { @@ -359,6 +368,11 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * max_elems, + env->vl * esz, max_elems * esz); + } } /* @@ -458,6 +472,8 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t vta = vext_vta(desc); /* load bytes from guest memory */ for (i = env->vstart; i < env->vl; i++, env->vstart++) { @@ -473,6 +489,11 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * max_elems, + env->vl * esz, max_elems * esz); + } } #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ @@ -540,6 +561,8 @@ vext_ldff(void *vd, void *v0, target_ulong base, uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t vta = vext_vta(desc); target_ulong addr, offset, remain; /* probe every access*/ @@ -595,6 +618,11 @@ ProbeSuccess: } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * max_elems, + env->vl * esz, max_elems * esz); + } } #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ From patchwork Mon Mar 7 07:32:57 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611075 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT2y90Fm5z9sFk for ; Wed, 30 Mar 2022 21:40:15 +1100 (AEDT) Received: from localhost ([::1]:59464 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVkK-0002ll-BR for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:40:12 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44392) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVL-0004xo-Tj; Wed, 30 Mar 2022 06:24:44 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37024) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVI-00075K-3x; Wed, 30 Mar 2022 06:24:43 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 9580F11EFFA; Wed, 30 Mar 2022 10:24:35 +0000 (UTC) From: ~eopxd Date: Sun, 06 Mar 2022 23:32:57 -0800 Subject: [PATCH qemu v7 06/14] target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions Message-ID: <164863587444.17401.9965527486691250478-6@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 25 ++ target/riscv/vector_helper.c | 310 +++++++++++++----------- 2 files changed, 197 insertions(+), 138 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 66cfc8c603..8dad3fa038 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1297,6 +1297,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, data = FIELD_DP32(data, VDATA, VM, vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); @@ -1333,6 +1334,16 @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, } if (a->vm && s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } TCGv_i64 src1 = tcg_temp_new_i64(); tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); @@ -1462,6 +1473,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, data = FIELD_DP32(data, VDATA, VM, vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); @@ -1491,6 +1503,16 @@ do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, } if (a->vm && s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); mark_vs_dirty(s); @@ -1544,6 +1566,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), @@ -1625,6 +1648,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), @@ -1703,6 +1727,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index a72b59554e..61ab0ce358 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -857,10 +857,12 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - opivx2_fn fn) + opivx2_fn fn, uint32_t esz) { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(desc, esz); + uint32_t vta = vext_vta(desc); uint32_t i; for (i = env->vstart; i < vl; i++) { @@ -870,30 +872,32 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, fn(vd, s1, vs2, i); } env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, total_elems * esz); } /* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME) \ +#define GEN_VEXT_VX(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ do_vext_vx(vd, v0, s1, vs2, env, desc, \ - do_##NAME); \ -} - -GEN_VEXT_VX(vadd_vx_b) -GEN_VEXT_VX(vadd_vx_h) -GEN_VEXT_VX(vadd_vx_w) -GEN_VEXT_VX(vadd_vx_d) -GEN_VEXT_VX(vsub_vx_b) -GEN_VEXT_VX(vsub_vx_h) -GEN_VEXT_VX(vsub_vx_w) -GEN_VEXT_VX(vsub_vx_d) -GEN_VEXT_VX(vrsub_vx_b) -GEN_VEXT_VX(vrsub_vx_h) -GEN_VEXT_VX(vrsub_vx_w) -GEN_VEXT_VX(vrsub_vx_d) + do_##NAME, ESZ); \ +} + +GEN_VEXT_VX(vadd_vx_b, 1) +GEN_VEXT_VX(vadd_vx_h, 2) +GEN_VEXT_VX(vadd_vx_w, 4) +GEN_VEXT_VX(vadd_vx_d, 8) +GEN_VEXT_VX(vsub_vx_b, 1) +GEN_VEXT_VX(vsub_vx_h, 2) +GEN_VEXT_VX(vsub_vx_w, 4) +GEN_VEXT_VX(vsub_vx_d, 8) +GEN_VEXT_VX(vrsub_vx_b, 1) +GEN_VEXT_VX(vrsub_vx_h, 2) +GEN_VEXT_VX(vrsub_vx_w, 4) +GEN_VEXT_VX(vrsub_vx_d, 8) void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) { @@ -1021,30 +1025,30 @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) -GEN_VEXT_VX(vwaddu_vx_b) -GEN_VEXT_VX(vwaddu_vx_h) -GEN_VEXT_VX(vwaddu_vx_w) -GEN_VEXT_VX(vwsubu_vx_b) -GEN_VEXT_VX(vwsubu_vx_h) -GEN_VEXT_VX(vwsubu_vx_w) -GEN_VEXT_VX(vwadd_vx_b) -GEN_VEXT_VX(vwadd_vx_h) -GEN_VEXT_VX(vwadd_vx_w) -GEN_VEXT_VX(vwsub_vx_b) -GEN_VEXT_VX(vwsub_vx_h) -GEN_VEXT_VX(vwsub_vx_w) -GEN_VEXT_VX(vwaddu_wx_b) -GEN_VEXT_VX(vwaddu_wx_h) -GEN_VEXT_VX(vwaddu_wx_w) -GEN_VEXT_VX(vwsubu_wx_b) -GEN_VEXT_VX(vwsubu_wx_h) -GEN_VEXT_VX(vwsubu_wx_w) -GEN_VEXT_VX(vwadd_wx_b) -GEN_VEXT_VX(vwadd_wx_h) -GEN_VEXT_VX(vwadd_wx_w) -GEN_VEXT_VX(vwsub_wx_b) -GEN_VEXT_VX(vwsub_wx_h) -GEN_VEXT_VX(vwsub_wx_w) +GEN_VEXT_VX(vwaddu_vx_b, 2) +GEN_VEXT_VX(vwaddu_vx_h, 4) +GEN_VEXT_VX(vwaddu_vx_w, 8) +GEN_VEXT_VX(vwsubu_vx_b, 2) +GEN_VEXT_VX(vwsubu_vx_h, 4) +GEN_VEXT_VX(vwsubu_vx_w, 8) +GEN_VEXT_VX(vwadd_vx_b, 2) +GEN_VEXT_VX(vwadd_vx_h, 4) +GEN_VEXT_VX(vwadd_vx_w, 8) +GEN_VEXT_VX(vwsub_vx_b, 2) +GEN_VEXT_VX(vwsub_vx_h, 4) +GEN_VEXT_VX(vwsub_vx_w, 8) +GEN_VEXT_VX(vwaddu_wx_b, 2) +GEN_VEXT_VX(vwaddu_wx_h, 4) +GEN_VEXT_VX(vwaddu_wx_w, 8) +GEN_VEXT_VX(vwsubu_wx_b, 2) +GEN_VEXT_VX(vwsubu_wx_h, 4) +GEN_VEXT_VX(vwsubu_wx_w, 8) +GEN_VEXT_VX(vwadd_wx_b, 2) +GEN_VEXT_VX(vwadd_wx_h, 4) +GEN_VEXT_VX(vwadd_wx_w, 8) +GEN_VEXT_VX(vwsub_wx_b, 2) +GEN_VEXT_VX(vwsub_wx_h, 4) +GEN_VEXT_VX(vwsub_wx_w, 8) /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ #define DO_VADC(N, M, C) (N + M + C) @@ -1055,6 +1059,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1065,6 +1072,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) @@ -1082,6 +1092,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1091,6 +1104,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) @@ -1113,6 +1129,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vl = env->vl; \ uint32_t vm = vext_vm(desc); \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1122,6 +1141,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) @@ -1140,6 +1165,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ { \ uint32_t vl = env->vl; \ uint32_t vm = vext_vm(desc); \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1149,6 +1177,12 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) @@ -1199,18 +1233,18 @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) -GEN_VEXT_VX(vand_vx_b) -GEN_VEXT_VX(vand_vx_h) -GEN_VEXT_VX(vand_vx_w) -GEN_VEXT_VX(vand_vx_d) -GEN_VEXT_VX(vor_vx_b) -GEN_VEXT_VX(vor_vx_h) -GEN_VEXT_VX(vor_vx_w) -GEN_VEXT_VX(vor_vx_d) -GEN_VEXT_VX(vxor_vx_b) -GEN_VEXT_VX(vxor_vx_h) -GEN_VEXT_VX(vxor_vx_w) -GEN_VEXT_VX(vxor_vx_d) +GEN_VEXT_VX(vand_vx_b, 1) +GEN_VEXT_VX(vand_vx_h, 2) +GEN_VEXT_VX(vand_vx_w, 4) +GEN_VEXT_VX(vand_vx_d, 8) +GEN_VEXT_VX(vor_vx_b, 1) +GEN_VEXT_VX(vor_vx_h, 2) +GEN_VEXT_VX(vor_vx_w, 4) +GEN_VEXT_VX(vor_vx_d, 8) +GEN_VEXT_VX(vxor_vx_b, 1) +GEN_VEXT_VX(vxor_vx_h, 2) +GEN_VEXT_VX(vxor_vx_w, 4) +GEN_VEXT_VX(vxor_vx_d, 8) /* Vector Single-Width Bit Shift Instructions */ #define DO_SLL(N, M) (N << (M)) @@ -1464,22 +1498,22 @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) -GEN_VEXT_VX(vminu_vx_b) -GEN_VEXT_VX(vminu_vx_h) -GEN_VEXT_VX(vminu_vx_w) -GEN_VEXT_VX(vminu_vx_d) -GEN_VEXT_VX(vmin_vx_b) -GEN_VEXT_VX(vmin_vx_h) -GEN_VEXT_VX(vmin_vx_w) -GEN_VEXT_VX(vmin_vx_d) -GEN_VEXT_VX(vmaxu_vx_b) -GEN_VEXT_VX(vmaxu_vx_h) -GEN_VEXT_VX(vmaxu_vx_w) -GEN_VEXT_VX(vmaxu_vx_d) -GEN_VEXT_VX(vmax_vx_b) -GEN_VEXT_VX(vmax_vx_h) -GEN_VEXT_VX(vmax_vx_w) -GEN_VEXT_VX(vmax_vx_d) +GEN_VEXT_VX(vminu_vx_b, 1) +GEN_VEXT_VX(vminu_vx_h, 2) +GEN_VEXT_VX(vminu_vx_w, 4) +GEN_VEXT_VX(vminu_vx_d, 8) +GEN_VEXT_VX(vmin_vx_b, 1) +GEN_VEXT_VX(vmin_vx_h, 2) +GEN_VEXT_VX(vmin_vx_w, 4) +GEN_VEXT_VX(vmin_vx_d, 8) +GEN_VEXT_VX(vmaxu_vx_b, 1) +GEN_VEXT_VX(vmaxu_vx_h, 2) +GEN_VEXT_VX(vmaxu_vx_w, 4) +GEN_VEXT_VX(vmaxu_vx_d, 8) +GEN_VEXT_VX(vmax_vx_b, 1) +GEN_VEXT_VX(vmax_vx_h, 2) +GEN_VEXT_VX(vmax_vx_w, 4) +GEN_VEXT_VX(vmax_vx_d, 8) /* Vector Single-Width Integer Multiply Instructions */ #define DO_MUL(N, M) (N * M) @@ -1623,22 +1657,22 @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) -GEN_VEXT_VX(vmul_vx_b) -GEN_VEXT_VX(vmul_vx_h) -GEN_VEXT_VX(vmul_vx_w) -GEN_VEXT_VX(vmul_vx_d) -GEN_VEXT_VX(vmulh_vx_b) -GEN_VEXT_VX(vmulh_vx_h) -GEN_VEXT_VX(vmulh_vx_w) -GEN_VEXT_VX(vmulh_vx_d) -GEN_VEXT_VX(vmulhu_vx_b) -GEN_VEXT_VX(vmulhu_vx_h) -GEN_VEXT_VX(vmulhu_vx_w) -GEN_VEXT_VX(vmulhu_vx_d) -GEN_VEXT_VX(vmulhsu_vx_b) -GEN_VEXT_VX(vmulhsu_vx_h) -GEN_VEXT_VX(vmulhsu_vx_w) -GEN_VEXT_VX(vmulhsu_vx_d) +GEN_VEXT_VX(vmul_vx_b, 1) +GEN_VEXT_VX(vmul_vx_h, 2) +GEN_VEXT_VX(vmul_vx_w, 4) +GEN_VEXT_VX(vmul_vx_d, 8) +GEN_VEXT_VX(vmulh_vx_b, 1) +GEN_VEXT_VX(vmulh_vx_h, 2) +GEN_VEXT_VX(vmulh_vx_w, 4) +GEN_VEXT_VX(vmulh_vx_d, 8) +GEN_VEXT_VX(vmulhu_vx_b, 1) +GEN_VEXT_VX(vmulhu_vx_h, 2) +GEN_VEXT_VX(vmulhu_vx_w, 4) +GEN_VEXT_VX(vmulhu_vx_d, 8) +GEN_VEXT_VX(vmulhsu_vx_b, 1) +GEN_VEXT_VX(vmulhsu_vx_h, 2) +GEN_VEXT_VX(vmulhsu_vx_w, 4) +GEN_VEXT_VX(vmulhsu_vx_d, 8) /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) @@ -1697,22 +1731,22 @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) -GEN_VEXT_VX(vdivu_vx_b) -GEN_VEXT_VX(vdivu_vx_h) -GEN_VEXT_VX(vdivu_vx_w) -GEN_VEXT_VX(vdivu_vx_d) -GEN_VEXT_VX(vdiv_vx_b) -GEN_VEXT_VX(vdiv_vx_h) -GEN_VEXT_VX(vdiv_vx_w) -GEN_VEXT_VX(vdiv_vx_d) -GEN_VEXT_VX(vremu_vx_b) -GEN_VEXT_VX(vremu_vx_h) -GEN_VEXT_VX(vremu_vx_w) -GEN_VEXT_VX(vremu_vx_d) -GEN_VEXT_VX(vrem_vx_b) -GEN_VEXT_VX(vrem_vx_h) -GEN_VEXT_VX(vrem_vx_w) -GEN_VEXT_VX(vrem_vx_d) +GEN_VEXT_VX(vdivu_vx_b, 1) +GEN_VEXT_VX(vdivu_vx_h, 2) +GEN_VEXT_VX(vdivu_vx_w, 4) +GEN_VEXT_VX(vdivu_vx_d, 8) +GEN_VEXT_VX(vdiv_vx_b, 1) +GEN_VEXT_VX(vdiv_vx_h, 2) +GEN_VEXT_VX(vdiv_vx_w, 4) +GEN_VEXT_VX(vdiv_vx_d, 8) +GEN_VEXT_VX(vremu_vx_b, 1) +GEN_VEXT_VX(vremu_vx_h, 2) +GEN_VEXT_VX(vremu_vx_w, 4) +GEN_VEXT_VX(vremu_vx_d, 8) +GEN_VEXT_VX(vrem_vx_b, 1) +GEN_VEXT_VX(vrem_vx_h, 2) +GEN_VEXT_VX(vrem_vx_w, 4) +GEN_VEXT_VX(vrem_vx_d, 8) /* Vector Widening Integer Multiply Instructions */ RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) @@ -1743,15 +1777,15 @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) -GEN_VEXT_VX(vwmul_vx_b) -GEN_VEXT_VX(vwmul_vx_h) -GEN_VEXT_VX(vwmul_vx_w) -GEN_VEXT_VX(vwmulu_vx_b) -GEN_VEXT_VX(vwmulu_vx_h) -GEN_VEXT_VX(vwmulu_vx_w) -GEN_VEXT_VX(vwmulsu_vx_b) -GEN_VEXT_VX(vwmulsu_vx_h) -GEN_VEXT_VX(vwmulsu_vx_w) +GEN_VEXT_VX(vwmul_vx_b, 2) +GEN_VEXT_VX(vwmul_vx_h, 4) +GEN_VEXT_VX(vwmul_vx_w, 8) +GEN_VEXT_VX(vwmulu_vx_b, 2) +GEN_VEXT_VX(vwmulu_vx_h, 4) +GEN_VEXT_VX(vwmulu_vx_w, 8) +GEN_VEXT_VX(vwmulsu_vx_b, 2) +GEN_VEXT_VX(vwmulsu_vx_h, 4) +GEN_VEXT_VX(vwmulsu_vx_w, 8) /* Vector Single-Width Integer Multiply-Add Instructions */ #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -1824,22 +1858,22 @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) -GEN_VEXT_VX(vmacc_vx_b) -GEN_VEXT_VX(vmacc_vx_h) -GEN_VEXT_VX(vmacc_vx_w) -GEN_VEXT_VX(vmacc_vx_d) -GEN_VEXT_VX(vnmsac_vx_b) -GEN_VEXT_VX(vnmsac_vx_h) -GEN_VEXT_VX(vnmsac_vx_w) -GEN_VEXT_VX(vnmsac_vx_d) -GEN_VEXT_VX(vmadd_vx_b) -GEN_VEXT_VX(vmadd_vx_h) -GEN_VEXT_VX(vmadd_vx_w) -GEN_VEXT_VX(vmadd_vx_d) -GEN_VEXT_VX(vnmsub_vx_b) -GEN_VEXT_VX(vnmsub_vx_h) -GEN_VEXT_VX(vnmsub_vx_w) -GEN_VEXT_VX(vnmsub_vx_d) +GEN_VEXT_VX(vmacc_vx_b, 1) +GEN_VEXT_VX(vmacc_vx_h, 2) +GEN_VEXT_VX(vmacc_vx_w, 4) +GEN_VEXT_VX(vmacc_vx_d, 8) +GEN_VEXT_VX(vnmsac_vx_b, 1) +GEN_VEXT_VX(vnmsac_vx_h, 2) +GEN_VEXT_VX(vnmsac_vx_w, 4) +GEN_VEXT_VX(vnmsac_vx_d, 8) +GEN_VEXT_VX(vmadd_vx_b, 1) +GEN_VEXT_VX(vmadd_vx_h, 2) +GEN_VEXT_VX(vmadd_vx_w, 4) +GEN_VEXT_VX(vmadd_vx_d, 8) +GEN_VEXT_VX(vnmsub_vx_b, 1) +GEN_VEXT_VX(vnmsub_vx_h, 2) +GEN_VEXT_VX(vnmsub_vx_w, 4) +GEN_VEXT_VX(vnmsub_vx_d, 8) /* Vector Widening Integer Multiply-Add Instructions */ RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) @@ -1873,18 +1907,18 @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) -GEN_VEXT_VX(vwmaccu_vx_b) -GEN_VEXT_VX(vwmaccu_vx_h) -GEN_VEXT_VX(vwmaccu_vx_w) -GEN_VEXT_VX(vwmacc_vx_b) -GEN_VEXT_VX(vwmacc_vx_h) -GEN_VEXT_VX(vwmacc_vx_w) -GEN_VEXT_VX(vwmaccsu_vx_b) -GEN_VEXT_VX(vwmaccsu_vx_h) -GEN_VEXT_VX(vwmaccsu_vx_w) -GEN_VEXT_VX(vwmaccus_vx_b) -GEN_VEXT_VX(vwmaccus_vx_h) -GEN_VEXT_VX(vwmaccus_vx_w) +GEN_VEXT_VX(vwmaccu_vx_b, 2) +GEN_VEXT_VX(vwmaccu_vx_h, 4) +GEN_VEXT_VX(vwmaccu_vx_w, 8) +GEN_VEXT_VX(vwmacc_vx_b, 2) +GEN_VEXT_VX(vwmacc_vx_h, 4) +GEN_VEXT_VX(vwmacc_vx_w, 8) +GEN_VEXT_VX(vwmaccsu_vx_b, 2) +GEN_VEXT_VX(vwmaccsu_vx_h, 4) +GEN_VEXT_VX(vwmaccsu_vx_w, 8) +GEN_VEXT_VX(vwmaccus_vx_b, 2) +GEN_VEXT_VX(vwmaccus_vx_h, 4) +GEN_VEXT_VX(vwmaccus_vx_w, 8) /* Vector Integer Merge and Move Instructions */ #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ From patchwork Mon Mar 7 09:38:18 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611083 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT3660Tvwz9sFq for ; Wed, 30 Mar 2022 21:47:10 +1100 (AEDT) Received: from localhost ([::1]:48800 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVr2-0006fj-4O for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:47:08 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44346) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVK-0004uN-BV; Wed, 30 Mar 2022 06:24:42 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37026) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVI-00075P-2l; Wed, 30 Mar 2022 06:24:41 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id C75AC11EFFB; Wed, 30 Mar 2022 10:24:35 +0000 (UTC) From: ~eopxd Date: Mon, 07 Mar 2022 01:38:18 -0800 Subject: [PATCH qemu v7 07/14] target/riscv: rvv: Add tail agnostic for vector integer shift instructions Message-ID: <164863587444.17401.9965527486691250478-7@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 11 +++++++++++ target/riscv/vector_helper.c | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 8dad3fa038..fabb2a78ae 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1852,6 +1852,16 @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, } if (a->vm && s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } TCGv_i32 src1 = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE)); @@ -1910,6 +1920,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 61ab0ce358..1462a664c6 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1257,6 +1257,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TS1); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1268,6 +1271,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) @@ -1292,6 +1298,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1302,6 +1311,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) From patchwork Mon Mar 7 09:43:53 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611071 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT2v15ymyz9sFk for ; Wed, 30 Mar 2022 21:37:33 +1100 (AEDT) Received: from localhost ([::1]:53250 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVhj-0006z2-TF for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:37:31 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44384) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVL-0004wP-Ai; Wed, 30 Mar 2022 06:24:43 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37028) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVI-00075R-4D; Wed, 30 Mar 2022 06:24:43 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id E3BA011F085; Wed, 30 Mar 2022 10:24:35 +0000 (UTC) From: ~eopxd Date: Mon, 07 Mar 2022 01:43:53 -0800 Subject: [PATCH qemu v7 08/14] target/riscv: rvv: Add tail agnostic for vector integer comparison instructions Message-ID: <164863587444.17401.9965527486691250478-8@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 1462a664c6..ed72fffb71 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1358,6 +1358,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1369,6 +1372,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) @@ -1407,6 +1416,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1418,6 +1430,12 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ DO_OP(s2, (ETYPE)(target_long)s1)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) From patchwork Mon Mar 7 09:53:23 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611077 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT32q2KBZz9sFk for ; Wed, 30 Mar 2022 21:44:19 +1100 (AEDT) Received: from localhost ([::1]:40484 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVoH-0000qU-Cl for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:44:17 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44404) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVM-0004za-NR; Wed, 30 Mar 2022 06:24:44 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37034) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVK-000761-1g; Wed, 30 Mar 2022 06:24:44 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 1CBE411F089; Wed, 30 Mar 2022 10:24:36 +0000 (UTC) From: ~eopxd Date: Mon, 07 Mar 2022 01:53:23 -0800 Subject: [PATCH qemu v7 09/14] target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions Message-ID: <164863587444.17401.9965527486691250478-9@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 44 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 24 ++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index fabb2a78ae..54c08991f7 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -2117,11 +2117,22 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) /* vmv.v.v has rs2 = 0 and vm = 1 */ vext_check_sss(s, a->rd, a->rs1, 0, 1)) { if (s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), MAXSZ(s), MAXSZ(s)); } else { uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_2_ptr * const fns[4] = { gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, @@ -2157,6 +2168,16 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) s1 = get_gpr(s, a->rs1, EXT_SIGN); if (s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), MAXSZ(s), MAXSZ(s), s1); } else { @@ -2164,6 +2185,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) TCGv_i64 s1_i64 = tcg_temp_new_i64(); TCGv_ptr dest = tcg_temp_new_ptr(); uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[4] = { gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, @@ -2194,6 +2216,16 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) vext_check_ss(s, a->rd, 0, 1)) { int64_t simm = sextract64(a->rs1, 0, 5); if (s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd), MAXSZ(s), MAXSZ(s), simm); mark_vs_dirty(s); @@ -2202,6 +2234,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) TCGv_i64 s1; TCGv_ptr dest; uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[4] = { gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, @@ -2774,6 +2807,16 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) TCGv_i64 t1; if (s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } t1 = tcg_temp_new_i64(); /* NaN-box f[rs1] */ do_nanbox(s, t1, cpu_fpr[a->rs1]); @@ -2785,6 +2828,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) TCGv_ptr dest; TCGv_i32 desc; uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[3] = { gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index ed72fffb71..bd0aeda03f 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1956,6 +1956,9 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -1963,6 +1966,9 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ *((ETYPE *)vd + H(i)) = s1; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) @@ -1975,12 +1981,18 @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) @@ -1993,6 +2005,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -2000,6 +2015,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) @@ -2012,6 +2030,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -2021,6 +2042,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ *((ETYPE *)vd + H(i)) = d; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) From patchwork Mon Mar 7 10:04:21 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611087 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT3Gt1rBdz9sFk for ; Wed, 30 Mar 2022 21:54:46 +1100 (AEDT) Received: from localhost ([::1]:33188 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVyO-0007TS-AE for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:54:44 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44646) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVn-0005GC-60; Wed, 30 Mar 2022 06:25:12 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37030) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVe-000760-08; Wed, 30 Mar 2022 06:25:06 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 3F6B711F08C; Wed, 30 Mar 2022 10:24:36 +0000 (UTC) From: ~eopxd Date: Mon, 07 Mar 2022 02:04:21 -0800 Subject: [PATCH qemu v7 10/14] target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions Message-ID: <164863587444.17401.9965527486691250478-10@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 220 ++++++++++++++++++----------------- 1 file changed, 114 insertions(+), 106 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index bd0aeda03f..057d67211b 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2094,10 +2094,12 @@ static inline void vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - opivv2_rm_fn *fn) + opivv2_rm_fn *fn, uint32_t esz) { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(desc, esz); + uint32_t vta = vext_vta(desc); switch (env->vxrm) { case 0: /* rnu */ @@ -2117,15 +2119,17 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, env, vl, vm, 3, fn); break; } + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, total_elems * esz); } /* generate helpers for fixed point instructions with OPIVV format */ -#define GEN_VEXT_VV_RM(NAME) \ +#define GEN_VEXT_VV_RM(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) @@ -2175,10 +2179,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) -GEN_VEXT_VV_RM(vsaddu_vv_b) -GEN_VEXT_VV_RM(vsaddu_vv_h) -GEN_VEXT_VV_RM(vsaddu_vv_w) -GEN_VEXT_VV_RM(vsaddu_vv_d) +GEN_VEXT_VV_RM(vsaddu_vv_b, 1) +GEN_VEXT_VV_RM(vsaddu_vv_h, 2) +GEN_VEXT_VV_RM(vsaddu_vv_w, 4) +GEN_VEXT_VV_RM(vsaddu_vv_d, 8) typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, CPURISCVState *env, int vxrm); @@ -2211,10 +2215,12 @@ static inline void vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - opivx2_rm_fn *fn) + opivx2_rm_fn *fn, uint32_t esz) { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(desc, esz); + uint32_t vta = vext_vta(desc); switch (env->vxrm) { case 0: /* rnu */ @@ -2234,25 +2240,27 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, env, vl, vm, 3, fn); break; } + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, total_elems * esz); } /* generate helpers for fixed point instructions with OPIVX format */ -#define GEN_VEXT_VX_RM(NAME) \ +#define GEN_VEXT_VX_RM(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) -GEN_VEXT_VX_RM(vsaddu_vx_b) -GEN_VEXT_VX_RM(vsaddu_vx_h) -GEN_VEXT_VX_RM(vsaddu_vx_w) -GEN_VEXT_VX_RM(vsaddu_vx_d) +GEN_VEXT_VX_RM(vsaddu_vx_b, 1) +GEN_VEXT_VX_RM(vsaddu_vx_h, 2) +GEN_VEXT_VX_RM(vsaddu_vx_w, 4) +GEN_VEXT_VX_RM(vsaddu_vx_d, 8) static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { @@ -2298,19 +2306,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) -GEN_VEXT_VV_RM(vsadd_vv_b) -GEN_VEXT_VV_RM(vsadd_vv_h) -GEN_VEXT_VV_RM(vsadd_vv_w) -GEN_VEXT_VV_RM(vsadd_vv_d) +GEN_VEXT_VV_RM(vsadd_vv_b, 1) +GEN_VEXT_VV_RM(vsadd_vv_h, 2) +GEN_VEXT_VV_RM(vsadd_vv_w, 4) +GEN_VEXT_VV_RM(vsadd_vv_d, 8) RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) -GEN_VEXT_VX_RM(vsadd_vx_b) -GEN_VEXT_VX_RM(vsadd_vx_h) -GEN_VEXT_VX_RM(vsadd_vx_w) -GEN_VEXT_VX_RM(vsadd_vx_d) +GEN_VEXT_VX_RM(vsadd_vx_b, 1) +GEN_VEXT_VX_RM(vsadd_vx_h, 2) +GEN_VEXT_VX_RM(vsadd_vx_w, 4) +GEN_VEXT_VX_RM(vsadd_vx_d, 8) static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) { @@ -2359,19 +2367,19 @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) -GEN_VEXT_VV_RM(vssubu_vv_b) -GEN_VEXT_VV_RM(vssubu_vv_h) -GEN_VEXT_VV_RM(vssubu_vv_w) -GEN_VEXT_VV_RM(vssubu_vv_d) +GEN_VEXT_VV_RM(vssubu_vv_b, 1) +GEN_VEXT_VV_RM(vssubu_vv_h, 2) +GEN_VEXT_VV_RM(vssubu_vv_w, 4) +GEN_VEXT_VV_RM(vssubu_vv_d, 8) RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) -GEN_VEXT_VX_RM(vssubu_vx_b) -GEN_VEXT_VX_RM(vssubu_vx_h) -GEN_VEXT_VX_RM(vssubu_vx_w) -GEN_VEXT_VX_RM(vssubu_vx_d) +GEN_VEXT_VX_RM(vssubu_vx_b, 1) +GEN_VEXT_VX_RM(vssubu_vx_h, 2) +GEN_VEXT_VX_RM(vssubu_vx_w, 4) +GEN_VEXT_VX_RM(vssubu_vx_d, 8) static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { @@ -2417,19 +2425,19 @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) -GEN_VEXT_VV_RM(vssub_vv_b) -GEN_VEXT_VV_RM(vssub_vv_h) -GEN_VEXT_VV_RM(vssub_vv_w) -GEN_VEXT_VV_RM(vssub_vv_d) +GEN_VEXT_VV_RM(vssub_vv_b, 1) +GEN_VEXT_VV_RM(vssub_vv_h, 2) +GEN_VEXT_VV_RM(vssub_vv_w, 4) +GEN_VEXT_VV_RM(vssub_vv_d, 8) RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) -GEN_VEXT_VX_RM(vssub_vx_b) -GEN_VEXT_VX_RM(vssub_vx_h) -GEN_VEXT_VX_RM(vssub_vx_w) -GEN_VEXT_VX_RM(vssub_vx_d) +GEN_VEXT_VX_RM(vssub_vx_b, 1) +GEN_VEXT_VX_RM(vssub_vx_h, 2) +GEN_VEXT_VX_RM(vssub_vx_w, 4) +GEN_VEXT_VX_RM(vssub_vx_d, 8) /* Vector Single-Width Averaging Add and Subtract */ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) @@ -2481,19 +2489,19 @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) -GEN_VEXT_VV_RM(vaadd_vv_b) -GEN_VEXT_VV_RM(vaadd_vv_h) -GEN_VEXT_VV_RM(vaadd_vv_w) -GEN_VEXT_VV_RM(vaadd_vv_d) +GEN_VEXT_VV_RM(vaadd_vv_b, 1) +GEN_VEXT_VV_RM(vaadd_vv_h, 2) +GEN_VEXT_VV_RM(vaadd_vv_w, 4) +GEN_VEXT_VV_RM(vaadd_vv_d, 8) RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) -GEN_VEXT_VX_RM(vaadd_vx_b) -GEN_VEXT_VX_RM(vaadd_vx_h) -GEN_VEXT_VX_RM(vaadd_vx_w) -GEN_VEXT_VX_RM(vaadd_vx_d) +GEN_VEXT_VX_RM(vaadd_vx_b, 1) +GEN_VEXT_VX_RM(vaadd_vx_h, 2) +GEN_VEXT_VX_RM(vaadd_vx_w, 4) +GEN_VEXT_VX_RM(vaadd_vx_d, 8) static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2518,19 +2526,19 @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) -GEN_VEXT_VV_RM(vaaddu_vv_b) -GEN_VEXT_VV_RM(vaaddu_vv_h) -GEN_VEXT_VV_RM(vaaddu_vv_w) -GEN_VEXT_VV_RM(vaaddu_vv_d) +GEN_VEXT_VV_RM(vaaddu_vv_b, 1) +GEN_VEXT_VV_RM(vaaddu_vv_h, 2) +GEN_VEXT_VV_RM(vaaddu_vv_w, 4) +GEN_VEXT_VV_RM(vaaddu_vv_d, 8) RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) -GEN_VEXT_VX_RM(vaaddu_vx_b) -GEN_VEXT_VX_RM(vaaddu_vx_h) -GEN_VEXT_VX_RM(vaaddu_vx_w) -GEN_VEXT_VX_RM(vaaddu_vx_d) +GEN_VEXT_VX_RM(vaaddu_vx_b, 1) +GEN_VEXT_VX_RM(vaaddu_vx_h, 2) +GEN_VEXT_VX_RM(vaaddu_vx_w, 4) +GEN_VEXT_VX_RM(vaaddu_vx_d, 8) static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) { @@ -2554,19 +2562,19 @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) -GEN_VEXT_VV_RM(vasub_vv_b) -GEN_VEXT_VV_RM(vasub_vv_h) -GEN_VEXT_VV_RM(vasub_vv_w) -GEN_VEXT_VV_RM(vasub_vv_d) +GEN_VEXT_VV_RM(vasub_vv_b, 1) +GEN_VEXT_VV_RM(vasub_vv_h, 2) +GEN_VEXT_VV_RM(vasub_vv_w, 4) +GEN_VEXT_VV_RM(vasub_vv_d, 8) RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) -GEN_VEXT_VX_RM(vasub_vx_b) -GEN_VEXT_VX_RM(vasub_vx_h) -GEN_VEXT_VX_RM(vasub_vx_w) -GEN_VEXT_VX_RM(vasub_vx_d) +GEN_VEXT_VX_RM(vasub_vx_b, 1) +GEN_VEXT_VX_RM(vasub_vx_h, 2) +GEN_VEXT_VX_RM(vasub_vx_w, 4) +GEN_VEXT_VX_RM(vasub_vx_d, 8) static inline uint32_t asubu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2591,19 +2599,19 @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) -GEN_VEXT_VV_RM(vasubu_vv_b) -GEN_VEXT_VV_RM(vasubu_vv_h) -GEN_VEXT_VV_RM(vasubu_vv_w) -GEN_VEXT_VV_RM(vasubu_vv_d) +GEN_VEXT_VV_RM(vasubu_vv_b, 1) +GEN_VEXT_VV_RM(vasubu_vv_h, 2) +GEN_VEXT_VV_RM(vasubu_vv_w, 4) +GEN_VEXT_VV_RM(vasubu_vv_d, 8) RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) -GEN_VEXT_VX_RM(vasubu_vx_b) -GEN_VEXT_VX_RM(vasubu_vx_h) -GEN_VEXT_VX_RM(vasubu_vx_w) -GEN_VEXT_VX_RM(vasubu_vx_d) +GEN_VEXT_VX_RM(vasubu_vx_b, 1) +GEN_VEXT_VX_RM(vasubu_vx_h, 2) +GEN_VEXT_VX_RM(vasubu_vx_w, 4) +GEN_VEXT_VX_RM(vasubu_vx_d, 8) /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -2698,19 +2706,19 @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) -GEN_VEXT_VV_RM(vsmul_vv_b) -GEN_VEXT_VV_RM(vsmul_vv_h) -GEN_VEXT_VV_RM(vsmul_vv_w) -GEN_VEXT_VV_RM(vsmul_vv_d) +GEN_VEXT_VV_RM(vsmul_vv_b, 1) +GEN_VEXT_VV_RM(vsmul_vv_h, 2) +GEN_VEXT_VV_RM(vsmul_vv_w, 4) +GEN_VEXT_VV_RM(vsmul_vv_d, 8) RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) -GEN_VEXT_VX_RM(vsmul_vx_b) -GEN_VEXT_VX_RM(vsmul_vx_h) -GEN_VEXT_VX_RM(vsmul_vx_w) -GEN_VEXT_VX_RM(vsmul_vx_d) +GEN_VEXT_VX_RM(vsmul_vx_b, 1) +GEN_VEXT_VX_RM(vsmul_vx_h, 2) +GEN_VEXT_VX_RM(vsmul_vx_w, 4) +GEN_VEXT_VX_RM(vsmul_vx_d, 8) /* Vector Single-Width Scaling Shift Instructions */ static inline uint8_t @@ -2757,19 +2765,19 @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) -GEN_VEXT_VV_RM(vssrl_vv_b) -GEN_VEXT_VV_RM(vssrl_vv_h) -GEN_VEXT_VV_RM(vssrl_vv_w) -GEN_VEXT_VV_RM(vssrl_vv_d) +GEN_VEXT_VV_RM(vssrl_vv_b, 1) +GEN_VEXT_VV_RM(vssrl_vv_h, 2) +GEN_VEXT_VV_RM(vssrl_vv_w, 4) +GEN_VEXT_VV_RM(vssrl_vv_d, 8) RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) -GEN_VEXT_VX_RM(vssrl_vx_b) -GEN_VEXT_VX_RM(vssrl_vx_h) -GEN_VEXT_VX_RM(vssrl_vx_w) -GEN_VEXT_VX_RM(vssrl_vx_d) +GEN_VEXT_VX_RM(vssrl_vx_b, 1) +GEN_VEXT_VX_RM(vssrl_vx_h, 2) +GEN_VEXT_VX_RM(vssrl_vx_w, 4) +GEN_VEXT_VX_RM(vssrl_vx_d, 8) static inline int8_t vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -2816,19 +2824,19 @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) -GEN_VEXT_VV_RM(vssra_vv_b) -GEN_VEXT_VV_RM(vssra_vv_h) -GEN_VEXT_VV_RM(vssra_vv_w) -GEN_VEXT_VV_RM(vssra_vv_d) +GEN_VEXT_VV_RM(vssra_vv_b, 1) +GEN_VEXT_VV_RM(vssra_vv_h, 2) +GEN_VEXT_VV_RM(vssra_vv_w, 4) +GEN_VEXT_VV_RM(vssra_vv_d, 8) RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) -GEN_VEXT_VX_RM(vssra_vx_b) -GEN_VEXT_VX_RM(vssra_vx_h) -GEN_VEXT_VX_RM(vssra_vx_w) -GEN_VEXT_VX_RM(vssra_vx_d) +GEN_VEXT_VX_RM(vssra_vx_b, 1) +GEN_VEXT_VX_RM(vssra_vx_h, 2) +GEN_VEXT_VX_RM(vssra_vx_w, 4) +GEN_VEXT_VX_RM(vssra_vx_d, 8) /* Vector Narrowing Fixed-Point Clip Instructions */ static inline int8_t @@ -2891,16 +2899,16 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) -GEN_VEXT_VV_RM(vnclip_wv_b) -GEN_VEXT_VV_RM(vnclip_wv_h) -GEN_VEXT_VV_RM(vnclip_wv_w) +GEN_VEXT_VV_RM(vnclip_wv_b, 1) +GEN_VEXT_VV_RM(vnclip_wv_h, 2) +GEN_VEXT_VV_RM(vnclip_wv_w, 4) RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) -GEN_VEXT_VX_RM(vnclip_wx_b) -GEN_VEXT_VX_RM(vnclip_wx_h) -GEN_VEXT_VX_RM(vnclip_wx_w) +GEN_VEXT_VX_RM(vnclip_wx_b, 1) +GEN_VEXT_VX_RM(vnclip_wx_h, 2) +GEN_VEXT_VX_RM(vnclip_wx_w, 4) static inline uint8_t vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) @@ -2953,16 +2961,16 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) -GEN_VEXT_VV_RM(vnclipu_wv_b) -GEN_VEXT_VV_RM(vnclipu_wv_h) -GEN_VEXT_VV_RM(vnclipu_wv_w) +GEN_VEXT_VV_RM(vnclipu_wv_b, 1) +GEN_VEXT_VV_RM(vnclipu_wv_h, 2) +GEN_VEXT_VV_RM(vnclipu_wv_w, 4) RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) -GEN_VEXT_VX_RM(vnclipu_wx_b) -GEN_VEXT_VX_RM(vnclipu_wx_h) -GEN_VEXT_VX_RM(vnclipu_wx_w) +GEN_VEXT_VX_RM(vnclipu_wx_b, 1) +GEN_VEXT_VX_RM(vnclipu_wx_h, 2) +GEN_VEXT_VX_RM(vnclipu_wx_w, 4) /* *** Vector Float Point Arithmetic Instructions From patchwork Mon Mar 7 10:05:42 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611096 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT3LX1Y8sz9sDX for ; Wed, 30 Mar 2022 21:57:56 +1100 (AEDT) Received: from localhost ([::1]:42544 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZW1S-0005Ma-9n for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:57:54 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44698) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVn-0005Gc-Ew; Wed, 30 Mar 2022 06:25:12 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37036) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVe-00076P-Ir; Wed, 30 Mar 2022 06:25:09 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 6F08011F08E; Wed, 30 Mar 2022 10:24:36 +0000 (UTC) From: ~eopxd Date: Mon, 07 Mar 2022 02:05:42 -0800 Subject: [PATCH qemu v7 11/14] target/riscv: rvv: Add tail agnostic for vector floating-point instructions Message-ID: <164863587444.17401.9965527486691250478-11@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 11 + target/riscv/vector_helper.c | 447 +++++++++++++----------- 2 files changed, 262 insertions(+), 196 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 54c08991f7..8abe80d3da 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -2398,6 +2398,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2480,6 +2481,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_set_rm(s, RISCV_FRM_DYN); \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2518,6 +2520,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2557,6 +2560,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_set_rm(s, RISCV_FRM_DYN); \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2593,6 +2597,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2632,6 +2637,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_set_rm(s, RISCV_FRM_DYN); \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2715,6 +2721,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, @@ -2929,6 +2936,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -2980,6 +2988,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -3047,6 +3056,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -3100,6 +3110,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 057d67211b..a85a0c667e 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2985,13 +2985,16 @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ } -#define GEN_VEXT_VV_ENV(NAME) \ +#define GEN_VEXT_VV_ENV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -3001,14 +3004,18 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ do_##NAME(vd, vs1, vs2, i, env); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(ESZ)](vd, vta, vl, \ + vl * ESZ, \ + total_elems * ESZ); \ } RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) -GEN_VEXT_VV_ENV(vfadd_vv_h) -GEN_VEXT_VV_ENV(vfadd_vv_w) -GEN_VEXT_VV_ENV(vfadd_vv_d) +GEN_VEXT_VV_ENV(vfadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfadd_vv_d, 8) #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -3018,13 +3025,16 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ } -#define GEN_VEXT_VF(NAME) \ +#define GEN_VEXT_VF(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -3034,27 +3044,31 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ do_##NAME(vd, s1, vs2, i, env); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(ESZ)](vd, vta, vl, \ + vl * ESZ, \ + total_elems * ESZ); \ } RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) -GEN_VEXT_VF(vfadd_vf_h) -GEN_VEXT_VF(vfadd_vf_w) -GEN_VEXT_VF(vfadd_vf_d) +GEN_VEXT_VF(vfadd_vf_h, 2) +GEN_VEXT_VF(vfadd_vf_w, 4) +GEN_VEXT_VF(vfadd_vf_d, 8) RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) -GEN_VEXT_VV_ENV(vfsub_vv_h) -GEN_VEXT_VV_ENV(vfsub_vv_w) -GEN_VEXT_VV_ENV(vfsub_vv_d) +GEN_VEXT_VV_ENV(vfsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfsub_vv_d, 8) RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) -GEN_VEXT_VF(vfsub_vf_h) -GEN_VEXT_VF(vfsub_vf_w) -GEN_VEXT_VF(vfsub_vf_d) +GEN_VEXT_VF(vfsub_vf_h, 2) +GEN_VEXT_VF(vfsub_vf_w, 4) +GEN_VEXT_VF(vfsub_vf_d, 8) static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) { @@ -3074,9 +3088,9 @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) -GEN_VEXT_VF(vfrsub_vf_h) -GEN_VEXT_VF(vfrsub_vf_w) -GEN_VEXT_VF(vfrsub_vf_d) +GEN_VEXT_VF(vfrsub_vf_h, 2) +GEN_VEXT_VF(vfrsub_vf_w, 4) +GEN_VEXT_VF(vfrsub_vf_d, 8) /* Vector Widening Floating-Point Add/Subtract Instructions */ static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) @@ -3094,12 +3108,12 @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) -GEN_VEXT_VV_ENV(vfwadd_vv_h) -GEN_VEXT_VV_ENV(vfwadd_vv_w) +GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) +GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) -GEN_VEXT_VF(vfwadd_vf_h) -GEN_VEXT_VF(vfwadd_vf_w) +GEN_VEXT_VF(vfwadd_vf_h, 4) +GEN_VEXT_VF(vfwadd_vf_w, 8) static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { @@ -3116,12 +3130,12 @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) -GEN_VEXT_VV_ENV(vfwsub_vv_h) -GEN_VEXT_VV_ENV(vfwsub_vv_w) +GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) +GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) -GEN_VEXT_VF(vfwsub_vf_h) -GEN_VEXT_VF(vfwsub_vf_w) +GEN_VEXT_VF(vfwsub_vf_h, 4) +GEN_VEXT_VF(vfwsub_vf_w, 8) static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) { @@ -3135,12 +3149,12 @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) -GEN_VEXT_VV_ENV(vfwadd_wv_h) -GEN_VEXT_VV_ENV(vfwadd_wv_w) +GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) +GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) -GEN_VEXT_VF(vfwadd_wf_h) -GEN_VEXT_VF(vfwadd_wf_w) +GEN_VEXT_VF(vfwadd_wf_h, 4) +GEN_VEXT_VF(vfwadd_wf_w, 8) static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) { @@ -3154,39 +3168,39 @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) -GEN_VEXT_VV_ENV(vfwsub_wv_h) -GEN_VEXT_VV_ENV(vfwsub_wv_w) +GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) +GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) -GEN_VEXT_VF(vfwsub_wf_h) -GEN_VEXT_VF(vfwsub_wf_w) +GEN_VEXT_VF(vfwsub_wf_h, 4) +GEN_VEXT_VF(vfwsub_wf_w, 8) /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) -GEN_VEXT_VV_ENV(vfmul_vv_h) -GEN_VEXT_VV_ENV(vfmul_vv_w) -GEN_VEXT_VV_ENV(vfmul_vv_d) +GEN_VEXT_VV_ENV(vfmul_vv_h, 2) +GEN_VEXT_VV_ENV(vfmul_vv_w, 4) +GEN_VEXT_VV_ENV(vfmul_vv_d, 8) RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) -GEN_VEXT_VF(vfmul_vf_h) -GEN_VEXT_VF(vfmul_vf_w) -GEN_VEXT_VF(vfmul_vf_d) +GEN_VEXT_VF(vfmul_vf_h, 2) +GEN_VEXT_VF(vfmul_vf_w, 4) +GEN_VEXT_VF(vfmul_vf_d, 8) RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) -GEN_VEXT_VV_ENV(vfdiv_vv_h) -GEN_VEXT_VV_ENV(vfdiv_vv_w) -GEN_VEXT_VV_ENV(vfdiv_vv_d) +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) -GEN_VEXT_VF(vfdiv_vf_h) -GEN_VEXT_VF(vfdiv_vf_w) -GEN_VEXT_VF(vfdiv_vf_d) +GEN_VEXT_VF(vfdiv_vf_h, 2) +GEN_VEXT_VF(vfdiv_vf_w, 4) +GEN_VEXT_VF(vfdiv_vf_d, 8) static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) { @@ -3206,9 +3220,9 @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) -GEN_VEXT_VF(vfrdiv_vf_h) -GEN_VEXT_VF(vfrdiv_vf_w) -GEN_VEXT_VF(vfrdiv_vf_d) +GEN_VEXT_VF(vfrdiv_vf_h, 2) +GEN_VEXT_VF(vfrdiv_vf_w, 4) +GEN_VEXT_VF(vfrdiv_vf_d, 8) /* Vector Widening Floating-Point Multiply */ static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) @@ -3225,12 +3239,12 @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) -GEN_VEXT_VV_ENV(vfwmul_vv_h) -GEN_VEXT_VV_ENV(vfwmul_vv_w) +GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) -GEN_VEXT_VF(vfwmul_vf_h) -GEN_VEXT_VF(vfwmul_vf_w) +GEN_VEXT_VF(vfwmul_vf_h, 4) +GEN_VEXT_VF(vfwmul_vf_w, 8) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -3261,9 +3275,9 @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) -GEN_VEXT_VV_ENV(vfmacc_vv_h) -GEN_VEXT_VV_ENV(vfmacc_vv_w) -GEN_VEXT_VV_ENV(vfmacc_vv_d) +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -3277,9 +3291,9 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) -GEN_VEXT_VF(vfmacc_vf_h) -GEN_VEXT_VF(vfmacc_vf_w) -GEN_VEXT_VF(vfmacc_vf_d) +GEN_VEXT_VF(vfmacc_vf_h, 2) +GEN_VEXT_VF(vfmacc_vf_w, 4) +GEN_VEXT_VF(vfmacc_vf_d, 8) static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3302,15 +3316,15 @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) -GEN_VEXT_VV_ENV(vfnmacc_vv_h) -GEN_VEXT_VV_ENV(vfnmacc_vv_w) -GEN_VEXT_VV_ENV(vfnmacc_vv_d) +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) -GEN_VEXT_VF(vfnmacc_vf_h) -GEN_VEXT_VF(vfnmacc_vf_w) -GEN_VEXT_VF(vfnmacc_vf_d) +GEN_VEXT_VF(vfnmacc_vf_h, 2) +GEN_VEXT_VF(vfnmacc_vf_w, 4) +GEN_VEXT_VF(vfnmacc_vf_d, 8) static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3330,15 +3344,15 @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) -GEN_VEXT_VV_ENV(vfmsac_vv_h) -GEN_VEXT_VV_ENV(vfmsac_vv_w) -GEN_VEXT_VV_ENV(vfmsac_vv_d) +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) -GEN_VEXT_VF(vfmsac_vf_h) -GEN_VEXT_VF(vfmsac_vf_w) -GEN_VEXT_VF(vfmsac_vf_d) +GEN_VEXT_VF(vfmsac_vf_h, 2) +GEN_VEXT_VF(vfmsac_vf_w, 4) +GEN_VEXT_VF(vfmsac_vf_d, 8) static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3358,15 +3372,15 @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) -GEN_VEXT_VV_ENV(vfnmsac_vv_h) -GEN_VEXT_VV_ENV(vfnmsac_vv_w) -GEN_VEXT_VV_ENV(vfnmsac_vv_d) +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) -GEN_VEXT_VF(vfnmsac_vf_h) -GEN_VEXT_VF(vfnmsac_vf_w) -GEN_VEXT_VF(vfnmsac_vf_d) +GEN_VEXT_VF(vfnmsac_vf_h, 2) +GEN_VEXT_VF(vfnmsac_vf_w, 4) +GEN_VEXT_VF(vfnmsac_vf_d, 8) static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3386,15 +3400,15 @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) -GEN_VEXT_VV_ENV(vfmadd_vv_h) -GEN_VEXT_VV_ENV(vfmadd_vv_w) -GEN_VEXT_VV_ENV(vfmadd_vv_d) +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) -GEN_VEXT_VF(vfmadd_vf_h) -GEN_VEXT_VF(vfmadd_vf_w) -GEN_VEXT_VF(vfmadd_vf_d) +GEN_VEXT_VF(vfmadd_vf_h, 2) +GEN_VEXT_VF(vfmadd_vf_w, 4) +GEN_VEXT_VF(vfmadd_vf_d, 8) static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3417,15 +3431,15 @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) -GEN_VEXT_VV_ENV(vfnmadd_vv_h) -GEN_VEXT_VV_ENV(vfnmadd_vv_w) -GEN_VEXT_VV_ENV(vfnmadd_vv_d) +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) -GEN_VEXT_VF(vfnmadd_vf_h) -GEN_VEXT_VF(vfnmadd_vf_w) -GEN_VEXT_VF(vfnmadd_vf_d) +GEN_VEXT_VF(vfnmadd_vf_h, 2) +GEN_VEXT_VF(vfnmadd_vf_w, 4) +GEN_VEXT_VF(vfnmadd_vf_d, 8) static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3445,15 +3459,15 @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) -GEN_VEXT_VV_ENV(vfmsub_vv_h) -GEN_VEXT_VV_ENV(vfmsub_vv_w) -GEN_VEXT_VV_ENV(vfmsub_vv_d) +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) -GEN_VEXT_VF(vfmsub_vf_h) -GEN_VEXT_VF(vfmsub_vf_w) -GEN_VEXT_VF(vfmsub_vf_d) +GEN_VEXT_VF(vfmsub_vf_h, 2) +GEN_VEXT_VF(vfmsub_vf_w, 4) +GEN_VEXT_VF(vfmsub_vf_d, 8) static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -3473,15 +3487,15 @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) -GEN_VEXT_VV_ENV(vfnmsub_vv_h) -GEN_VEXT_VV_ENV(vfnmsub_vv_w) -GEN_VEXT_VV_ENV(vfnmsub_vv_d) +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) -GEN_VEXT_VF(vfnmsub_vf_h) -GEN_VEXT_VF(vfnmsub_vf_w) -GEN_VEXT_VF(vfnmsub_vf_d) +GEN_VEXT_VF(vfnmsub_vf_h, 2) +GEN_VEXT_VF(vfnmsub_vf_w, 4) +GEN_VEXT_VF(vfnmsub_vf_d, 8) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) @@ -3498,12 +3512,12 @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) -GEN_VEXT_VV_ENV(vfwmacc_vv_h) -GEN_VEXT_VV_ENV(vfwmacc_vv_w) +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) -GEN_VEXT_VF(vfwmacc_vf_h) -GEN_VEXT_VF(vfwmacc_vf_w) +GEN_VEXT_VF(vfwmacc_vf_h, 4) +GEN_VEXT_VF(vfwmacc_vf_w, 8) static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -3521,12 +3535,12 @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) -GEN_VEXT_VV_ENV(vfwnmacc_vv_h) -GEN_VEXT_VV_ENV(vfwnmacc_vv_w) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) -GEN_VEXT_VF(vfwnmacc_vf_h) -GEN_VEXT_VF(vfwnmacc_vf_w) +GEN_VEXT_VF(vfwnmacc_vf_h, 4) +GEN_VEXT_VF(vfwnmacc_vf_w, 8) static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -3544,12 +3558,12 @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) -GEN_VEXT_VV_ENV(vfwmsac_vv_h) -GEN_VEXT_VV_ENV(vfwmsac_vv_w) +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) -GEN_VEXT_VF(vfwmsac_vf_h) -GEN_VEXT_VF(vfwmsac_vf_w) +GEN_VEXT_VF(vfwmsac_vf_h, 4) +GEN_VEXT_VF(vfwmsac_vf_w, 8) static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -3567,12 +3581,12 @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) -GEN_VEXT_VV_ENV(vfwnmsac_vv_h) -GEN_VEXT_VV_ENV(vfwnmsac_vv_w) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) -GEN_VEXT_VF(vfwnmsac_vf_h) -GEN_VEXT_VF(vfwnmsac_vf_w) +GEN_VEXT_VF(vfwnmsac_vf_h, 4) +GEN_VEXT_VF(vfwnmsac_vf_w, 8) /* Vector Floating-Point Square-Root Instruction */ /* (TD, T2, TX2) */ @@ -3588,12 +3602,15 @@ static void do_##NAME(void *vd, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ } -#define GEN_VEXT_V_ENV(NAME) \ +#define GEN_VEXT_V_ENV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ if (vl == 0) { \ @@ -3606,14 +3623,19 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ do_##NAME(vd, vs2, i, env); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(ESZ)](vd, vta, vl, \ + vl * ESZ, \ + total_elems * \ + ESZ); \ } RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) -GEN_VEXT_V_ENV(vfsqrt_v_h) -GEN_VEXT_V_ENV(vfsqrt_v_w) -GEN_VEXT_V_ENV(vfsqrt_v_d) +GEN_VEXT_V_ENV(vfsqrt_v_h, 2) +GEN_VEXT_V_ENV(vfsqrt_v_w, 4) +GEN_VEXT_V_ENV(vfsqrt_v_d, 8) /* * Vector Floating-Point Reciprocal Square-Root Estimate Instruction @@ -3793,9 +3815,9 @@ static float64 frsqrt7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) -GEN_VEXT_V_ENV(vfrsqrt7_v_h) -GEN_VEXT_V_ENV(vfrsqrt7_v_w) -GEN_VEXT_V_ENV(vfrsqrt7_v_d) +GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) +GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) +GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) /* * Vector Floating-Point Reciprocal Estimate Instruction @@ -3984,36 +4006,36 @@ static float64 frec7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) -GEN_VEXT_V_ENV(vfrec7_v_h) -GEN_VEXT_V_ENV(vfrec7_v_w) -GEN_VEXT_V_ENV(vfrec7_v_d) +GEN_VEXT_V_ENV(vfrec7_v_h, 2) +GEN_VEXT_V_ENV(vfrec7_v_w, 4) +GEN_VEXT_V_ENV(vfrec7_v_d, 8) /* Vector Floating-Point MIN/MAX Instructions */ RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) -GEN_VEXT_VV_ENV(vfmin_vv_h) -GEN_VEXT_VV_ENV(vfmin_vv_w) -GEN_VEXT_VV_ENV(vfmin_vv_d) +GEN_VEXT_VV_ENV(vfmin_vv_h, 2) +GEN_VEXT_VV_ENV(vfmin_vv_w, 4) +GEN_VEXT_VV_ENV(vfmin_vv_d, 8) RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) -GEN_VEXT_VF(vfmin_vf_h) -GEN_VEXT_VF(vfmin_vf_w) -GEN_VEXT_VF(vfmin_vf_d) +GEN_VEXT_VF(vfmin_vf_h, 2) +GEN_VEXT_VF(vfmin_vf_w, 4) +GEN_VEXT_VF(vfmin_vf_d, 8) RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) -GEN_VEXT_VV_ENV(vfmax_vv_h) -GEN_VEXT_VV_ENV(vfmax_vv_w) -GEN_VEXT_VV_ENV(vfmax_vv_d) +GEN_VEXT_VV_ENV(vfmax_vv_h, 2) +GEN_VEXT_VV_ENV(vfmax_vv_w, 4) +GEN_VEXT_VV_ENV(vfmax_vv_d, 8) RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) -GEN_VEXT_VF(vfmax_vf_h) -GEN_VEXT_VF(vfmax_vf_w) -GEN_VEXT_VF(vfmax_vf_d) +GEN_VEXT_VF(vfmax_vf_h, 2) +GEN_VEXT_VF(vfmax_vf_w, 4) +GEN_VEXT_VF(vfmax_vf_d, 8) /* Vector Floating-Point Sign-Injection Instructions */ static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) @@ -4034,15 +4056,15 @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) -GEN_VEXT_VV_ENV(vfsgnj_vv_h) -GEN_VEXT_VV_ENV(vfsgnj_vv_w) -GEN_VEXT_VV_ENV(vfsgnj_vv_d) +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) -GEN_VEXT_VF(vfsgnj_vf_h) -GEN_VEXT_VF(vfsgnj_vf_w) -GEN_VEXT_VF(vfsgnj_vf_d) +GEN_VEXT_VF(vfsgnj_vf_h, 2) +GEN_VEXT_VF(vfsgnj_vf_w, 4) +GEN_VEXT_VF(vfsgnj_vf_d, 8) static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) { @@ -4062,15 +4084,15 @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) -GEN_VEXT_VV_ENV(vfsgnjn_vv_h) -GEN_VEXT_VV_ENV(vfsgnjn_vv_w) -GEN_VEXT_VV_ENV(vfsgnjn_vv_d) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) -GEN_VEXT_VF(vfsgnjn_vf_h) -GEN_VEXT_VF(vfsgnjn_vf_w) -GEN_VEXT_VF(vfsgnjn_vf_d) +GEN_VEXT_VF(vfsgnjn_vf_h, 2) +GEN_VEXT_VF(vfsgnjn_vf_w, 4) +GEN_VEXT_VF(vfsgnjn_vf_d, 8) static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) { @@ -4090,15 +4112,15 @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) -GEN_VEXT_VV_ENV(vfsgnjx_vv_h) -GEN_VEXT_VV_ENV(vfsgnjx_vv_w) -GEN_VEXT_VV_ENV(vfsgnjx_vv_d) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) -GEN_VEXT_VF(vfsgnjx_vf_h) -GEN_VEXT_VF(vfsgnjx_vf_w) -GEN_VEXT_VF(vfsgnjx_vf_d) +GEN_VEXT_VF(vfsgnjx_vf_h, 2) +GEN_VEXT_VF(vfsgnjx_vf_w, 4) +GEN_VEXT_VF(vfsgnjx_vf_d, 8) /* Vector Floating-Point Compare Instructions */ #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ @@ -4107,6 +4129,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -4119,6 +4144,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ DO_OP(s2, s1, &env->fp_status)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) @@ -4131,6 +4162,9 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -4142,6 +4176,12 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) @@ -4242,12 +4282,15 @@ static void do_##NAME(void *vd, void *vs2, int i) \ *((TD *)vd + HD(i)) = OP(s2); \ } -#define GEN_VEXT_V(NAME) \ +#define GEN_VEXT_V(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -4257,6 +4300,11 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ do_##NAME(vd, vs2, i); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(ESZ)](vd, vta, vl, \ + vl * ESZ, \ + total_elems * \ + ESZ); \ } target_ulong fclass_h(uint64_t frs1) @@ -4319,17 +4367,21 @@ target_ulong fclass_d(uint64_t frs1) RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) -GEN_VEXT_V(vfclass_v_h) -GEN_VEXT_V(vfclass_v_w) -GEN_VEXT_V(vfclass_v_d) +GEN_VEXT_V(vfclass_v_h, 2) +GEN_VEXT_V(vfclass_v_w, 4) +GEN_VEXT_V(vfclass_v_d, 8) /* Vector Floating-Point Merge Instruction */ + #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -4338,6 +4390,9 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) @@ -4349,33 +4404,33 @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) -GEN_VEXT_V_ENV(vfcvt_x_f_v_h) -GEN_VEXT_V_ENV(vfcvt_x_f_v_w) -GEN_VEXT_V_ENV(vfcvt_x_f_v_d) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_x_v_h) -GEN_VEXT_V_ENV(vfcvt_f_x_v_w) -GEN_VEXT_V_ENV(vfcvt_f_x_v_d) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) /* Widening Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4385,30 +4440,30 @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d) /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) /* * vfwcvt.f.f.v vd, vs2, vm @@ -4421,8 +4476,8 @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) /* Narrowing Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4433,29 +4488,29 @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) -GEN_VEXT_V_ENV(vfncvt_x_f_w_b) -GEN_VEXT_V_ENV(vfncvt_x_f_w_h) -GEN_VEXT_V_ENV(vfncvt_x_f_w_w) +GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) +GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_x_w_h) -GEN_VEXT_V_ENV(vfncvt_f_x_w_w) +GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ static uint16_t vfncvtffv16(uint32_t a, float_status *s) @@ -4465,8 +4520,8 @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s) RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_f_w_h) -GEN_VEXT_V_ENV(vfncvt_f_f_w_w) +GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) /* *** Vector Reduction Operations From patchwork Mon Mar 7 12:21:37 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611082 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT3655BY9z9sFk for ; Wed, 30 Mar 2022 21:47:09 +1100 (AEDT) Received: from localhost ([::1]:48652 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVr1-0006ZG-No for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:47:07 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44624) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVn-0005G4-4A; Wed, 30 Mar 2022 06:25:12 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37038) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVe-00076Q-Gj; Wed, 30 Mar 2022 06:25:05 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 92DA411F090; Wed, 30 Mar 2022 10:24:36 +0000 (UTC) From: ~eopxd Date: Mon, 07 Mar 2022 04:21:37 -0800 Subject: [PATCH qemu v7 12/14] target/riscv: rvv: Add tail agnostic for vector reduction instructions Message-ID: <164863587444.17401.9965527486691250478-12@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index a85a0c667e..7884314779 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4533,6 +4533,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t vlenb = simd_maxsz(desc); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ TD s1 = *((TD *)vs1 + HD(0)); \ \ @@ -4545,6 +4548,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ } \ *((TD *)vd + HD(0)) = s1; \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, \ + vlenb); \ } /* vd[0] = sum(vs1[0], vs2[*]) */ @@ -4614,6 +4620,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t vlenb = simd_maxsz(desc); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ TD s1 = *((TD *)vs1 + HD(0)); \ \ @@ -4626,6 +4635,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ } \ *((TD *)vd + HD(0)) = s1; \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, \ + vlenb); \ } /* Unordered sum */ @@ -4650,6 +4662,9 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t esz = sizeof(uint32_t); + uint32_t vlenb = simd_maxsz(desc); + uint32_t vta = vext_vta(desc); uint32_t i; uint32_t s1 = *((uint32_t *)vs1 + H4(0)); @@ -4663,6 +4678,8 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, } *((uint32_t *)vd + H4(0)) = s1; env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, vlenb); } void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, @@ -4670,6 +4687,9 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t esz = sizeof(uint64_t); + uint32_t vlenb = simd_maxsz(desc); + uint32_t vta = vext_vta(desc); uint32_t i; uint64_t s1 = *((uint64_t *)vs1); @@ -4683,6 +4703,8 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, } *((uint64_t *)vd) = s1; env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, vlenb); } /* From patchwork Mon Mar 7 15:26:05 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611094 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT3J15mNrz9sFk for ; Wed, 30 Mar 2022 21:55:45 +1100 (AEDT) Received: from localhost ([::1]:37352 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVzL-0001pV-SP for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:55:43 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44660) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVn-0005GG-6p; Wed, 30 Mar 2022 06:25:12 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37040) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVe-00076b-Rn; Wed, 30 Mar 2022 06:25:07 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id B72FD11F09F; Wed, 30 Mar 2022 10:24:36 +0000 (UTC) From: ~eopxd Date: Mon, 07 Mar 2022 07:26:05 -0800 Subject: [PATCH qemu v7 13/14] target/riscv: rvv: Add tail agnostic for vector mask instructions Message-ID: <164863587444.17401.9965527486691250478-13@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 4 ++++ target/riscv/vector_helper.c | 28 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 8abe80d3da..92f6dc5c76 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -3199,6 +3199,7 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -3303,6 +3304,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ cpu_env, s->cfg_ptr->vlen / 8, \ @@ -3340,6 +3342,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_3_ptr * const fns[4] = { gen_helper_viota_m_b, gen_helper_viota_m_h, gen_helper_viota_m_w, gen_helper_viota_m_d, @@ -3369,6 +3372,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_2_ptr * const fns[4] = { gen_helper_vid_v_b, gen_helper_vid_v_h, gen_helper_vid_v_w, gen_helper_vid_v_d, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 7884314779..cc5cc324a4 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4717,6 +4717,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ int a, b; \ \ @@ -4726,6 +4728,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ vext_set_elem_mask(vd, i, OP(b, a)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } #define DO_NAND(N, M) (!(N & M)) @@ -4793,6 +4801,8 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = env_archcpu(env)->cfg.vlen; + uint32_t vta = vext_vta(desc); int i; bool first_mask_bit = false; @@ -4821,6 +4831,12 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, } } env->vstart = 0; + /* set tail elements to 1s */ + if (vta) { + for (; i < total_elems; i++) { + vext_set_elem_mask(vd, i, 1); + } + } } void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, @@ -4848,6 +4864,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t sum = 0; \ int i; \ \ @@ -4861,6 +4880,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) @@ -4874,6 +4896,9 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ int i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -4883,6 +4908,9 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ *((ETYPE *)vd + H(i)) = i; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) From patchwork Mon Mar 7 15:59:26 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: ~eopxd X-Patchwork-Id: 1611086 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4KT3Dw0WCBz9sFk for ; Wed, 30 Mar 2022 21:53:04 +1100 (AEDT) Received: from localhost ([::1]:59116 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nZVwk-0005w0-4N for incoming@patchwork.ozlabs.org; Wed, 30 Mar 2022 06:53:02 -0400 Received: from eggs.gnu.org ([209.51.188.92]:44572) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVm-0005Fe-D6; Wed, 30 Mar 2022 06:25:10 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:37042) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nZVVe-00076c-Qs; Wed, 30 Mar 2022 06:25:07 -0400 Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id E23FC11F0A5; Wed, 30 Mar 2022 10:24:36 +0000 (UTC) From: ~eopxd Date: Mon, 07 Mar 2022 07:59:26 -0800 Subject: [PATCH qemu v7 14/14] target/riscv: rvv: Add tail agnostic for vector permutation instructions Message-ID: <164863587444.17401.9965527486691250478-14@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164863587444.17401.9965527486691250478-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org MIME-Version: 1.0 Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 22 ++++++++++++ target/riscv/vector_helper.c | 48 +++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 92f6dc5c76..8d87501e03 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -3732,6 +3732,16 @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) } if (a->vm && s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } int scale = s->lmul - (s->sew + 3); int vlmax = scale < 0 ? s->cfg_ptr->vlen >> -scale : s->cfg_ptr->vlen << scale; @@ -3765,6 +3775,16 @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) } if (a->vm && s->vl_eq_vlmax) { + if (s->vta && s->lmul < 0) { + /* + * tail elements may pass vlmax when lmul < 0 + * set tail elements to 1s + */ + uint32_t vlenb = s->cfg_ptr->vlen >> 3; + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rd), -1, + vlenb, vlenb); + } int scale = s->lmul - (s->sew + 3); int vlmax = scale < 0 ? s->cfg_ptr->vlen >> -scale : s->cfg_ptr->vlen << scale; @@ -3817,6 +3837,7 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a) tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, @@ -3922,6 +3943,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) } data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs2), cpu_env, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index cc5cc324a4..fbde0c9248 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4929,6 +4929,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ target_ulong offset = s1, i_min, i; \ \ i_min = MAX(env->vstart, offset); \ @@ -4938,6 +4941,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ } \ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ } \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ @@ -4953,6 +4959,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ target_ulong i_max, i; \ \ i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ @@ -4969,6 +4978,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ } \ \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ @@ -4984,6 +4996,9 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -4997,6 +5012,9 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VSLIE1UP(8, H1) @@ -5024,6 +5042,9 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -5037,6 +5058,9 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_VSLIDE1DOWN(8, H1) @@ -5090,6 +5114,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TS2); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint64_t index; \ uint32_t i; \ \ @@ -5105,6 +5132,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ @@ -5125,6 +5155,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint64_t index = s1; \ uint32_t i; \ \ @@ -5139,6 +5172,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ @@ -5153,6 +5189,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t num = 0, i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -5163,6 +5202,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ num++; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } /* Compress into vd elements of vs2 where vs1 is enabled */ @@ -5199,6 +5241,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ { \ uint32_t vl = env->vl; \ uint32_t vm = vext_vm(desc); \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -5208,6 +5253,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + total_elems * esz); \ } GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)