From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647694907006394.59007971184633; Sat, 19 Mar 2022 06:01:47 -0700 (PDT) Received: from localhost ([::1]:56894 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYiH-00010c-H1 for importer@patchew.org; Sat, 19 Mar 2022 09:01:45 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57576) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXf-0004oR-6T; Sat, 19 Mar 2022 08:50:47 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:35984) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXa-0003XQ-S9; Sat, 19 Mar 2022 08:50:46 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id B1F8E11EF2F; Sat, 19 Mar 2022 12:50:40 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 14 Mar 2022 00:38:00 -0700 Subject: [PATCH qemu 01/13] target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed Message-ID: <164769423983.18409.14760549429989700286-1@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647694908130100001 From: eopXD No functional change intended in this commit. Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 1132 +++++++++++++++++----------------- 1 file changed, 565 insertions(+), 567 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 3bd4aac9c9..e94caf1a3c 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -710,7 +710,6 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) =20 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - uint32_t esz, uint32_t dsz, opivv2_fn *fn) { uint32_t vm =3D vext_vm(desc); @@ -727,23 +726,23 @@ static void do_vext_vv(void *vd, void *v0, void *vs1,= void *vs2, } =20 /* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ - do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ do_##NAME); \ } =20 -GEN_VEXT_VV(vadd_vv_b, 1, 1) -GEN_VEXT_VV(vadd_vv_h, 2, 2) -GEN_VEXT_VV(vadd_vv_w, 4, 4) -GEN_VEXT_VV(vadd_vv_d, 8, 8) -GEN_VEXT_VV(vsub_vv_b, 1, 1) -GEN_VEXT_VV(vsub_vv_h, 2, 2) -GEN_VEXT_VV(vsub_vv_w, 4, 4) -GEN_VEXT_VV(vsub_vv_d, 8, 8) +GEN_VEXT_VV(vadd_vv_b) +GEN_VEXT_VV(vadd_vv_h) +GEN_VEXT_VV(vadd_vv_w) +GEN_VEXT_VV(vadd_vv_d) +GEN_VEXT_VV(vsub_vv_b) +GEN_VEXT_VV(vsub_vv_h) +GEN_VEXT_VV(vsub_vv_w) +GEN_VEXT_VV(vsub_vv_d) =20 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); =20 @@ -773,7 +772,6 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) =20 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - uint32_t esz, uint32_t dsz, opivx2_fn fn) { uint32_t vm =3D vext_vm(desc); @@ -790,27 +788,27 @@ static void do_vext_vx(void *vd, void *v0, target_lon= g s1, void *vs2, } =20 /* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VX(NAME) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ - do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_vext_vx(vd, v0, s1, vs2, env, desc, \ do_##NAME); \ } =20 -GEN_VEXT_VX(vadd_vx_b, 1, 1) -GEN_VEXT_VX(vadd_vx_h, 2, 2) -GEN_VEXT_VX(vadd_vx_w, 4, 4) -GEN_VEXT_VX(vadd_vx_d, 8, 8) -GEN_VEXT_VX(vsub_vx_b, 1, 1) -GEN_VEXT_VX(vsub_vx_h, 2, 2) -GEN_VEXT_VX(vsub_vx_w, 4, 4) -GEN_VEXT_VX(vsub_vx_d, 8, 8) -GEN_VEXT_VX(vrsub_vx_b, 1, 1) -GEN_VEXT_VX(vrsub_vx_h, 2, 2) -GEN_VEXT_VX(vrsub_vx_w, 4, 4) -GEN_VEXT_VX(vrsub_vx_d, 8, 8) +GEN_VEXT_VX(vadd_vx_b) +GEN_VEXT_VX(vadd_vx_h) +GEN_VEXT_VX(vadd_vx_w) +GEN_VEXT_VX(vadd_vx_d) +GEN_VEXT_VX(vsub_vx_b) +GEN_VEXT_VX(vsub_vx_h) +GEN_VEXT_VX(vsub_vx_w) +GEN_VEXT_VX(vsub_vx_d) +GEN_VEXT_VX(vrsub_vx_b) +GEN_VEXT_VX(vrsub_vx_h) +GEN_VEXT_VX(vrsub_vx_w) +GEN_VEXT_VX(vrsub_vx_d) =20 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) { @@ -889,30 +887,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, D= O_ADD) RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) -GEN_VEXT_VV(vwaddu_vv_b, 1, 2) -GEN_VEXT_VV(vwaddu_vv_h, 2, 4) -GEN_VEXT_VV(vwaddu_vv_w, 4, 8) -GEN_VEXT_VV(vwsubu_vv_b, 1, 2) -GEN_VEXT_VV(vwsubu_vv_h, 2, 4) -GEN_VEXT_VV(vwsubu_vv_w, 4, 8) -GEN_VEXT_VV(vwadd_vv_b, 1, 2) -GEN_VEXT_VV(vwadd_vv_h, 2, 4) -GEN_VEXT_VV(vwadd_vv_w, 4, 8) -GEN_VEXT_VV(vwsub_vv_b, 1, 2) -GEN_VEXT_VV(vwsub_vv_h, 2, 4) -GEN_VEXT_VV(vwsub_vv_w, 4, 8) -GEN_VEXT_VV(vwaddu_wv_b, 1, 2) -GEN_VEXT_VV(vwaddu_wv_h, 2, 4) -GEN_VEXT_VV(vwaddu_wv_w, 4, 8) -GEN_VEXT_VV(vwsubu_wv_b, 1, 2) -GEN_VEXT_VV(vwsubu_wv_h, 2, 4) -GEN_VEXT_VV(vwsubu_wv_w, 4, 8) -GEN_VEXT_VV(vwadd_wv_b, 1, 2) -GEN_VEXT_VV(vwadd_wv_h, 2, 4) -GEN_VEXT_VV(vwadd_wv_w, 4, 8) -GEN_VEXT_VV(vwsub_wv_b, 1, 2) -GEN_VEXT_VV(vwsub_wv_h, 2, 4) -GEN_VEXT_VV(vwsub_wv_w, 4, 8) +GEN_VEXT_VV(vwaddu_vv_b) +GEN_VEXT_VV(vwaddu_vv_h) +GEN_VEXT_VV(vwaddu_vv_w) +GEN_VEXT_VV(vwsubu_vv_b) +GEN_VEXT_VV(vwsubu_vv_h) +GEN_VEXT_VV(vwsubu_vv_w) +GEN_VEXT_VV(vwadd_vv_b) +GEN_VEXT_VV(vwadd_vv_h) +GEN_VEXT_VV(vwadd_vv_w) +GEN_VEXT_VV(vwsub_vv_b) +GEN_VEXT_VV(vwsub_vv_h) +GEN_VEXT_VV(vwsub_vv_w) +GEN_VEXT_VV(vwaddu_wv_b) +GEN_VEXT_VV(vwaddu_wv_h) +GEN_VEXT_VV(vwaddu_wv_w) +GEN_VEXT_VV(vwsubu_wv_b) +GEN_VEXT_VV(vwsubu_wv_h) +GEN_VEXT_VV(vwsubu_wv_w) +GEN_VEXT_VV(vwadd_wv_b) +GEN_VEXT_VV(vwadd_wv_h) +GEN_VEXT_VV(vwadd_wv_w) +GEN_VEXT_VV(vwsub_wv_b) +GEN_VEXT_VV(vwsub_wv_h) +GEN_VEXT_VV(vwsub_wv_w) =20 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) @@ -938,30 +936,30 @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_AD= D) RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) -GEN_VEXT_VX(vwaddu_vx_b, 1, 2) -GEN_VEXT_VX(vwaddu_vx_h, 2, 4) -GEN_VEXT_VX(vwaddu_vx_w, 4, 8) -GEN_VEXT_VX(vwsubu_vx_b, 1, 2) -GEN_VEXT_VX(vwsubu_vx_h, 2, 4) -GEN_VEXT_VX(vwsubu_vx_w, 4, 8) -GEN_VEXT_VX(vwadd_vx_b, 1, 2) -GEN_VEXT_VX(vwadd_vx_h, 2, 4) -GEN_VEXT_VX(vwadd_vx_w, 4, 8) -GEN_VEXT_VX(vwsub_vx_b, 1, 2) -GEN_VEXT_VX(vwsub_vx_h, 2, 4) -GEN_VEXT_VX(vwsub_vx_w, 4, 8) -GEN_VEXT_VX(vwaddu_wx_b, 1, 2) -GEN_VEXT_VX(vwaddu_wx_h, 2, 4) -GEN_VEXT_VX(vwaddu_wx_w, 4, 8) -GEN_VEXT_VX(vwsubu_wx_b, 1, 2) -GEN_VEXT_VX(vwsubu_wx_h, 2, 4) -GEN_VEXT_VX(vwsubu_wx_w, 4, 8) -GEN_VEXT_VX(vwadd_wx_b, 1, 2) -GEN_VEXT_VX(vwadd_wx_h, 2, 4) -GEN_VEXT_VX(vwadd_wx_w, 4, 8) -GEN_VEXT_VX(vwsub_wx_b, 1, 2) -GEN_VEXT_VX(vwsub_wx_h, 2, 4) -GEN_VEXT_VX(vwsub_wx_w, 4, 8) +GEN_VEXT_VX(vwaddu_vx_b) +GEN_VEXT_VX(vwaddu_vx_h) +GEN_VEXT_VX(vwaddu_vx_w) +GEN_VEXT_VX(vwsubu_vx_b) +GEN_VEXT_VX(vwsubu_vx_h) +GEN_VEXT_VX(vwsubu_vx_w) +GEN_VEXT_VX(vwadd_vx_b) +GEN_VEXT_VX(vwadd_vx_h) +GEN_VEXT_VX(vwadd_vx_w) +GEN_VEXT_VX(vwsub_vx_b) +GEN_VEXT_VX(vwsub_vx_h) +GEN_VEXT_VX(vwsub_vx_w) +GEN_VEXT_VX(vwaddu_wx_b) +GEN_VEXT_VX(vwaddu_wx_h) +GEN_VEXT_VX(vwaddu_wx_w) +GEN_VEXT_VX(vwsubu_wx_b) +GEN_VEXT_VX(vwsubu_wx_h) +GEN_VEXT_VX(vwsubu_wx_w) +GEN_VEXT_VX(vwadd_wx_b) +GEN_VEXT_VX(vwadd_wx_h) +GEN_VEXT_VX(vwadd_wx_w) +GEN_VEXT_VX(vwsub_wx_b) +GEN_VEXT_VX(vwsub_wx_h) +GEN_VEXT_VX(vwsub_wx_w) =20 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ #define DO_VADC(N, M, C) (N + M + C) @@ -1091,18 +1089,18 @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO= _XOR) RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) -GEN_VEXT_VV(vand_vv_b, 1, 1) -GEN_VEXT_VV(vand_vv_h, 2, 2) -GEN_VEXT_VV(vand_vv_w, 4, 4) -GEN_VEXT_VV(vand_vv_d, 8, 8) -GEN_VEXT_VV(vor_vv_b, 1, 1) -GEN_VEXT_VV(vor_vv_h, 2, 2) -GEN_VEXT_VV(vor_vv_w, 4, 4) -GEN_VEXT_VV(vor_vv_d, 8, 8) -GEN_VEXT_VV(vxor_vv_b, 1, 1) -GEN_VEXT_VV(vxor_vv_h, 2, 2) -GEN_VEXT_VV(vxor_vv_w, 4, 4) -GEN_VEXT_VV(vxor_vv_d, 8, 8) +GEN_VEXT_VV(vand_vv_b) +GEN_VEXT_VV(vand_vv_h) +GEN_VEXT_VV(vand_vv_w) +GEN_VEXT_VV(vand_vv_d) +GEN_VEXT_VV(vor_vv_b) +GEN_VEXT_VV(vor_vv_h) +GEN_VEXT_VV(vor_vv_w) +GEN_VEXT_VV(vor_vv_d) +GEN_VEXT_VV(vxor_vv_b) +GEN_VEXT_VV(vxor_vv_h) +GEN_VEXT_VV(vxor_vv_w) +GEN_VEXT_VV(vxor_vv_d) =20 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) @@ -1116,18 +1114,18 @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) -GEN_VEXT_VX(vand_vx_b, 1, 1) -GEN_VEXT_VX(vand_vx_h, 2, 2) -GEN_VEXT_VX(vand_vx_w, 4, 4) -GEN_VEXT_VX(vand_vx_d, 8, 8) -GEN_VEXT_VX(vor_vx_b, 1, 1) -GEN_VEXT_VX(vor_vx_h, 2, 2) -GEN_VEXT_VX(vor_vx_w, 4, 4) -GEN_VEXT_VX(vor_vx_d, 8, 8) -GEN_VEXT_VX(vxor_vx_b, 1, 1) -GEN_VEXT_VX(vxor_vx_h, 2, 2) -GEN_VEXT_VX(vxor_vx_w, 4, 4) -GEN_VEXT_VX(vxor_vx_d, 8, 8) +GEN_VEXT_VX(vand_vx_b) +GEN_VEXT_VX(vand_vx_h) +GEN_VEXT_VX(vand_vx_w) +GEN_VEXT_VX(vand_vx_d) +GEN_VEXT_VX(vor_vx_b) +GEN_VEXT_VX(vor_vx_h) +GEN_VEXT_VX(vor_vx_w) +GEN_VEXT_VX(vor_vx_d) +GEN_VEXT_VX(vxor_vx_b) +GEN_VEXT_VX(vxor_vx_h) +GEN_VEXT_VX(vxor_vx_w) +GEN_VEXT_VX(vxor_vx_d) =20 /* Vector Single-Width Bit Shift Instructions */ #define DO_SLL(N, M) (N << (M)) @@ -1348,22 +1346,22 @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO= _MAX) RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) -GEN_VEXT_VV(vminu_vv_b, 1, 1) -GEN_VEXT_VV(vminu_vv_h, 2, 2) -GEN_VEXT_VV(vminu_vv_w, 4, 4) -GEN_VEXT_VV(vminu_vv_d, 8, 8) -GEN_VEXT_VV(vmin_vv_b, 1, 1) -GEN_VEXT_VV(vmin_vv_h, 2, 2) -GEN_VEXT_VV(vmin_vv_w, 4, 4) -GEN_VEXT_VV(vmin_vv_d, 8, 8) -GEN_VEXT_VV(vmaxu_vv_b, 1, 1) -GEN_VEXT_VV(vmaxu_vv_h, 2, 2) -GEN_VEXT_VV(vmaxu_vv_w, 4, 4) -GEN_VEXT_VV(vmaxu_vv_d, 8, 8) -GEN_VEXT_VV(vmax_vv_b, 1, 1) -GEN_VEXT_VV(vmax_vv_h, 2, 2) -GEN_VEXT_VV(vmax_vv_w, 4, 4) -GEN_VEXT_VV(vmax_vv_d, 8, 8) +GEN_VEXT_VV(vminu_vv_b) +GEN_VEXT_VV(vminu_vv_h) +GEN_VEXT_VV(vminu_vv_w) +GEN_VEXT_VV(vminu_vv_d) +GEN_VEXT_VV(vmin_vv_b) +GEN_VEXT_VV(vmin_vv_h) +GEN_VEXT_VV(vmin_vv_w) +GEN_VEXT_VV(vmin_vv_d) +GEN_VEXT_VV(vmaxu_vv_b) +GEN_VEXT_VV(vmaxu_vv_h) +GEN_VEXT_VV(vmaxu_vv_w) +GEN_VEXT_VV(vmaxu_vv_d) +GEN_VEXT_VV(vmax_vv_b) +GEN_VEXT_VV(vmax_vv_h) +GEN_VEXT_VV(vmax_vv_w) +GEN_VEXT_VV(vmax_vv_d) =20 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) @@ -1381,22 +1379,22 @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) -GEN_VEXT_VX(vminu_vx_b, 1, 1) -GEN_VEXT_VX(vminu_vx_h, 2, 2) -GEN_VEXT_VX(vminu_vx_w, 4, 4) -GEN_VEXT_VX(vminu_vx_d, 8, 8) -GEN_VEXT_VX(vmin_vx_b, 1, 1) -GEN_VEXT_VX(vmin_vx_h, 2, 2) -GEN_VEXT_VX(vmin_vx_w, 4, 4) -GEN_VEXT_VX(vmin_vx_d, 8, 8) -GEN_VEXT_VX(vmaxu_vx_b, 1, 1) -GEN_VEXT_VX(vmaxu_vx_h, 2, 2) -GEN_VEXT_VX(vmaxu_vx_w, 4, 4) -GEN_VEXT_VX(vmaxu_vx_d, 8, 8) -GEN_VEXT_VX(vmax_vx_b, 1, 1) -GEN_VEXT_VX(vmax_vx_h, 2, 2) -GEN_VEXT_VX(vmax_vx_w, 4, 4) -GEN_VEXT_VX(vmax_vx_d, 8, 8) +GEN_VEXT_VX(vminu_vx_b) +GEN_VEXT_VX(vminu_vx_h) +GEN_VEXT_VX(vminu_vx_w) +GEN_VEXT_VX(vminu_vx_d) +GEN_VEXT_VX(vmin_vx_b) +GEN_VEXT_VX(vmin_vx_h) +GEN_VEXT_VX(vmin_vx_w) +GEN_VEXT_VX(vmin_vx_d) +GEN_VEXT_VX(vmaxu_vx_b) +GEN_VEXT_VX(vmaxu_vx_h) +GEN_VEXT_VX(vmaxu_vx_w) +GEN_VEXT_VX(vmaxu_vx_d) +GEN_VEXT_VX(vmax_vx_b) +GEN_VEXT_VX(vmax_vx_h) +GEN_VEXT_VX(vmax_vx_w) +GEN_VEXT_VX(vmax_vx_d) =20 /* Vector Single-Width Integer Multiply Instructions */ #define DO_MUL(N, M) (N * M) @@ -1404,10 +1402,10 @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO= _MUL) RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) -GEN_VEXT_VV(vmul_vv_b, 1, 1) -GEN_VEXT_VV(vmul_vv_h, 2, 2) -GEN_VEXT_VV(vmul_vv_w, 4, 4) -GEN_VEXT_VV(vmul_vv_d, 8, 8) +GEN_VEXT_VV(vmul_vv_b) +GEN_VEXT_VV(vmul_vv_h) +GEN_VEXT_VV(vmul_vv_w) +GEN_VEXT_VV(vmul_vv_d) =20 static int8_t do_mulh_b(int8_t s2, int8_t s1) { @@ -1511,18 +1509,18 @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1,= do_mulhsu_b) RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) -GEN_VEXT_VV(vmulh_vv_b, 1, 1) -GEN_VEXT_VV(vmulh_vv_h, 2, 2) -GEN_VEXT_VV(vmulh_vv_w, 4, 4) -GEN_VEXT_VV(vmulh_vv_d, 8, 8) -GEN_VEXT_VV(vmulhu_vv_b, 1, 1) -GEN_VEXT_VV(vmulhu_vv_h, 2, 2) -GEN_VEXT_VV(vmulhu_vv_w, 4, 4) -GEN_VEXT_VV(vmulhu_vv_d, 8, 8) -GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) -GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) -GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) -GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) +GEN_VEXT_VV(vmulh_vv_b) +GEN_VEXT_VV(vmulh_vv_h) +GEN_VEXT_VV(vmulh_vv_w) +GEN_VEXT_VV(vmulh_vv_d) +GEN_VEXT_VV(vmulhu_vv_b) +GEN_VEXT_VV(vmulhu_vv_h) +GEN_VEXT_VV(vmulhu_vv_w) +GEN_VEXT_VV(vmulhu_vv_d) +GEN_VEXT_VV(vmulhsu_vv_b) +GEN_VEXT_VV(vmulhsu_vv_h) +GEN_VEXT_VV(vmulhsu_vv_w) +GEN_VEXT_VV(vmulhsu_vv_d) =20 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) @@ -1540,22 +1538,22 @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_= mulhsu_b) RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) -GEN_VEXT_VX(vmul_vx_b, 1, 1) -GEN_VEXT_VX(vmul_vx_h, 2, 2) -GEN_VEXT_VX(vmul_vx_w, 4, 4) -GEN_VEXT_VX(vmul_vx_d, 8, 8) -GEN_VEXT_VX(vmulh_vx_b, 1, 1) -GEN_VEXT_VX(vmulh_vx_h, 2, 2) -GEN_VEXT_VX(vmulh_vx_w, 4, 4) -GEN_VEXT_VX(vmulh_vx_d, 8, 8) -GEN_VEXT_VX(vmulhu_vx_b, 1, 1) -GEN_VEXT_VX(vmulhu_vx_h, 2, 2) -GEN_VEXT_VX(vmulhu_vx_w, 4, 4) -GEN_VEXT_VX(vmulhu_vx_d, 8, 8) -GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) -GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) -GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) -GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) +GEN_VEXT_VX(vmul_vx_b) +GEN_VEXT_VX(vmul_vx_h) +GEN_VEXT_VX(vmul_vx_w) +GEN_VEXT_VX(vmul_vx_d) +GEN_VEXT_VX(vmulh_vx_b) +GEN_VEXT_VX(vmulh_vx_h) +GEN_VEXT_VX(vmulh_vx_w) +GEN_VEXT_VX(vmulh_vx_d) +GEN_VEXT_VX(vmulhu_vx_b) +GEN_VEXT_VX(vmulhu_vx_h) +GEN_VEXT_VX(vmulhu_vx_w) +GEN_VEXT_VX(vmulhu_vx_d) +GEN_VEXT_VX(vmulhsu_vx_b) +GEN_VEXT_VX(vmulhsu_vx_h) +GEN_VEXT_VX(vmulhsu_vx_w) +GEN_VEXT_VX(vmulhsu_vx_d) =20 /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M =3D=3D 0) ? (__typeof(N))(-1) : N / M) @@ -1581,22 +1579,22 @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO= _REM) RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) -GEN_VEXT_VV(vdivu_vv_b, 1, 1) -GEN_VEXT_VV(vdivu_vv_h, 2, 2) -GEN_VEXT_VV(vdivu_vv_w, 4, 4) -GEN_VEXT_VV(vdivu_vv_d, 8, 8) -GEN_VEXT_VV(vdiv_vv_b, 1, 1) -GEN_VEXT_VV(vdiv_vv_h, 2, 2) -GEN_VEXT_VV(vdiv_vv_w, 4, 4) -GEN_VEXT_VV(vdiv_vv_d, 8, 8) -GEN_VEXT_VV(vremu_vv_b, 1, 1) -GEN_VEXT_VV(vremu_vv_h, 2, 2) -GEN_VEXT_VV(vremu_vv_w, 4, 4) -GEN_VEXT_VV(vremu_vv_d, 8, 8) -GEN_VEXT_VV(vrem_vv_b, 1, 1) -GEN_VEXT_VV(vrem_vv_h, 2, 2) -GEN_VEXT_VV(vrem_vv_w, 4, 4) -GEN_VEXT_VV(vrem_vv_d, 8, 8) +GEN_VEXT_VV(vdivu_vv_b) +GEN_VEXT_VV(vdivu_vv_h) +GEN_VEXT_VV(vdivu_vv_w) +GEN_VEXT_VV(vdivu_vv_d) +GEN_VEXT_VV(vdiv_vv_b) +GEN_VEXT_VV(vdiv_vv_h) +GEN_VEXT_VV(vdiv_vv_w) +GEN_VEXT_VV(vdiv_vv_d) +GEN_VEXT_VV(vremu_vv_b) +GEN_VEXT_VV(vremu_vv_h) +GEN_VEXT_VV(vremu_vv_w) +GEN_VEXT_VV(vremu_vv_d) +GEN_VEXT_VV(vrem_vv_b) +GEN_VEXT_VV(vrem_vv_h) +GEN_VEXT_VV(vrem_vv_w) +GEN_VEXT_VV(vrem_vv_d) =20 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) @@ -1614,22 +1612,22 @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) -GEN_VEXT_VX(vdivu_vx_b, 1, 1) -GEN_VEXT_VX(vdivu_vx_h, 2, 2) -GEN_VEXT_VX(vdivu_vx_w, 4, 4) -GEN_VEXT_VX(vdivu_vx_d, 8, 8) -GEN_VEXT_VX(vdiv_vx_b, 1, 1) -GEN_VEXT_VX(vdiv_vx_h, 2, 2) -GEN_VEXT_VX(vdiv_vx_w, 4, 4) -GEN_VEXT_VX(vdiv_vx_d, 8, 8) -GEN_VEXT_VX(vremu_vx_b, 1, 1) -GEN_VEXT_VX(vremu_vx_h, 2, 2) -GEN_VEXT_VX(vremu_vx_w, 4, 4) -GEN_VEXT_VX(vremu_vx_d, 8, 8) -GEN_VEXT_VX(vrem_vx_b, 1, 1) -GEN_VEXT_VX(vrem_vx_h, 2, 2) -GEN_VEXT_VX(vrem_vx_w, 4, 4) -GEN_VEXT_VX(vrem_vx_d, 8, 8) +GEN_VEXT_VX(vdivu_vx_b) +GEN_VEXT_VX(vdivu_vx_h) +GEN_VEXT_VX(vdivu_vx_w) +GEN_VEXT_VX(vdivu_vx_d) +GEN_VEXT_VX(vdiv_vx_b) +GEN_VEXT_VX(vdiv_vx_h) +GEN_VEXT_VX(vdiv_vx_w) +GEN_VEXT_VX(vdiv_vx_d) +GEN_VEXT_VX(vremu_vx_b) +GEN_VEXT_VX(vremu_vx_h) +GEN_VEXT_VX(vremu_vx_w) +GEN_VEXT_VX(vremu_vx_d) +GEN_VEXT_VX(vrem_vx_b) +GEN_VEXT_VX(vrem_vx_h) +GEN_VEXT_VX(vrem_vx_w) +GEN_VEXT_VX(vrem_vx_d) =20 /* Vector Widening Integer Multiply Instructions */ RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) @@ -1641,15 +1639,15 @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4,= DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) -GEN_VEXT_VV(vwmul_vv_b, 1, 2) -GEN_VEXT_VV(vwmul_vv_h, 2, 4) -GEN_VEXT_VV(vwmul_vv_w, 4, 8) -GEN_VEXT_VV(vwmulu_vv_b, 1, 2) -GEN_VEXT_VV(vwmulu_vv_h, 2, 4) -GEN_VEXT_VV(vwmulu_vv_w, 4, 8) -GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) -GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) -GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) +GEN_VEXT_VV(vwmul_vv_b) +GEN_VEXT_VV(vwmul_vv_h) +GEN_VEXT_VV(vwmul_vv_w) +GEN_VEXT_VV(vwmulu_vv_b) +GEN_VEXT_VV(vwmulu_vv_h) +GEN_VEXT_VV(vwmulu_vv_w) +GEN_VEXT_VV(vwmulsu_vv_b) +GEN_VEXT_VV(vwmulsu_vv_h) +GEN_VEXT_VV(vwmulsu_vv_w) =20 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) @@ -1660,15 +1658,15 @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_= MUL) RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) -GEN_VEXT_VX(vwmul_vx_b, 1, 2) -GEN_VEXT_VX(vwmul_vx_h, 2, 4) -GEN_VEXT_VX(vwmul_vx_w, 4, 8) -GEN_VEXT_VX(vwmulu_vx_b, 1, 2) -GEN_VEXT_VX(vwmulu_vx_h, 2, 4) -GEN_VEXT_VX(vwmulu_vx_w, 4, 8) -GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) -GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) -GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) +GEN_VEXT_VX(vwmul_vx_b) +GEN_VEXT_VX(vwmul_vx_h) +GEN_VEXT_VX(vwmul_vx_w) +GEN_VEXT_VX(vwmulu_vx_b) +GEN_VEXT_VX(vwmulu_vx_h) +GEN_VEXT_VX(vwmulu_vx_w) +GEN_VEXT_VX(vwmulsu_vx_b) +GEN_VEXT_VX(vwmulsu_vx_h) +GEN_VEXT_VX(vwmulsu_vx_w) =20 /* Vector Single-Width Integer Multiply-Add Instructions */ #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -1700,22 +1698,22 @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, = DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) -GEN_VEXT_VV(vmacc_vv_b, 1, 1) -GEN_VEXT_VV(vmacc_vv_h, 2, 2) -GEN_VEXT_VV(vmacc_vv_w, 4, 4) -GEN_VEXT_VV(vmacc_vv_d, 8, 8) -GEN_VEXT_VV(vnmsac_vv_b, 1, 1) -GEN_VEXT_VV(vnmsac_vv_h, 2, 2) -GEN_VEXT_VV(vnmsac_vv_w, 4, 4) -GEN_VEXT_VV(vnmsac_vv_d, 8, 8) -GEN_VEXT_VV(vmadd_vv_b, 1, 1) -GEN_VEXT_VV(vmadd_vv_h, 2, 2) -GEN_VEXT_VV(vmadd_vv_w, 4, 4) -GEN_VEXT_VV(vmadd_vv_d, 8, 8) -GEN_VEXT_VV(vnmsub_vv_b, 1, 1) -GEN_VEXT_VV(vnmsub_vv_h, 2, 2) -GEN_VEXT_VV(vnmsub_vv_w, 4, 4) -GEN_VEXT_VV(vnmsub_vv_d, 8, 8) +GEN_VEXT_VV(vmacc_vv_b) +GEN_VEXT_VV(vmacc_vv_h) +GEN_VEXT_VV(vmacc_vv_w) +GEN_VEXT_VV(vmacc_vv_d) +GEN_VEXT_VV(vnmsac_vv_b) +GEN_VEXT_VV(vnmsac_vv_h) +GEN_VEXT_VV(vnmsac_vv_w) +GEN_VEXT_VV(vnmsac_vv_d) +GEN_VEXT_VV(vmadd_vv_b) +GEN_VEXT_VV(vmadd_vv_h) +GEN_VEXT_VV(vmadd_vv_w) +GEN_VEXT_VV(vmadd_vv_d) +GEN_VEXT_VV(vnmsub_vv_b) +GEN_VEXT_VV(vnmsub_vv_h) +GEN_VEXT_VV(vnmsub_vv_w) +GEN_VEXT_VV(vnmsub_vv_d) =20 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ @@ -1741,22 +1739,22 @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_N= MSUB) RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) -GEN_VEXT_VX(vmacc_vx_b, 1, 1) -GEN_VEXT_VX(vmacc_vx_h, 2, 2) -GEN_VEXT_VX(vmacc_vx_w, 4, 4) -GEN_VEXT_VX(vmacc_vx_d, 8, 8) -GEN_VEXT_VX(vnmsac_vx_b, 1, 1) -GEN_VEXT_VX(vnmsac_vx_h, 2, 2) -GEN_VEXT_VX(vnmsac_vx_w, 4, 4) -GEN_VEXT_VX(vnmsac_vx_d, 8, 8) -GEN_VEXT_VX(vmadd_vx_b, 1, 1) -GEN_VEXT_VX(vmadd_vx_h, 2, 2) -GEN_VEXT_VX(vmadd_vx_w, 4, 4) -GEN_VEXT_VX(vmadd_vx_d, 8, 8) -GEN_VEXT_VX(vnmsub_vx_b, 1, 1) -GEN_VEXT_VX(vnmsub_vx_h, 2, 2) -GEN_VEXT_VX(vnmsub_vx_w, 4, 4) -GEN_VEXT_VX(vnmsub_vx_d, 8, 8) +GEN_VEXT_VX(vmacc_vx_b) +GEN_VEXT_VX(vmacc_vx_h) +GEN_VEXT_VX(vmacc_vx_w) +GEN_VEXT_VX(vmacc_vx_d) +GEN_VEXT_VX(vnmsac_vx_b) +GEN_VEXT_VX(vnmsac_vx_h) +GEN_VEXT_VX(vnmsac_vx_w) +GEN_VEXT_VX(vnmsac_vx_d) +GEN_VEXT_VX(vmadd_vx_b) +GEN_VEXT_VX(vmadd_vx_h) +GEN_VEXT_VX(vmadd_vx_w) +GEN_VEXT_VX(vmadd_vx_d) +GEN_VEXT_VX(vnmsub_vx_b) +GEN_VEXT_VX(vnmsub_vx_h) +GEN_VEXT_VX(vnmsub_vx_w) +GEN_VEXT_VX(vnmsub_vx_d) =20 /* Vector Widening Integer Multiply-Add Instructions */ RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) @@ -1768,15 +1766,15 @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4,= DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) -GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) -GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) -GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) -GEN_VEXT_VV(vwmacc_vv_b, 1, 2) -GEN_VEXT_VV(vwmacc_vv_h, 2, 4) -GEN_VEXT_VV(vwmacc_vv_w, 4, 8) -GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) -GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) -GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) +GEN_VEXT_VV(vwmaccu_vv_b) +GEN_VEXT_VV(vwmaccu_vv_h) +GEN_VEXT_VV(vwmaccu_vv_w) +GEN_VEXT_VV(vwmacc_vv_b) +GEN_VEXT_VV(vwmacc_vv_h) +GEN_VEXT_VV(vwmacc_vv_w) +GEN_VEXT_VV(vwmaccsu_vv_b) +GEN_VEXT_VV(vwmaccsu_vv_h) +GEN_VEXT_VV(vwmaccsu_vv_w) =20 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) @@ -1790,18 +1788,18 @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, D= O_MACC) RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) -GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) -GEN_VEXT_VX(vwmacc_vx_b, 1, 2) -GEN_VEXT_VX(vwmacc_vx_h, 2, 4) -GEN_VEXT_VX(vwmacc_vx_w, 4, 8) -GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) -GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) +GEN_VEXT_VX(vwmaccu_vx_b) +GEN_VEXT_VX(vwmaccu_vx_h) +GEN_VEXT_VX(vwmaccu_vx_w) +GEN_VEXT_VX(vwmacc_vx_b) +GEN_VEXT_VX(vwmacc_vx_h) +GEN_VEXT_VX(vwmacc_vx_w) +GEN_VEXT_VX(vwmaccsu_vx_b) +GEN_VEXT_VX(vwmaccsu_vx_h) +GEN_VEXT_VX(vwmaccsu_vx_w) +GEN_VEXT_VX(vwmaccus_vx_b) +GEN_VEXT_VX(vwmaccus_vx_h) +GEN_VEXT_VX(vwmaccus_vx_w) =20 /* Vector Integer Merge and Move Instructions */ #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ @@ -1922,7 +1920,7 @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, static inline void vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, - uint32_t desc, uint32_t esz, uint32_t dsz, + uint32_t desc, opivv2_rm_fn *fn) { uint32_t vm =3D vext_vm(desc); @@ -1949,11 +1947,11 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *v= s2, } =20 /* generate helpers for fixed point instructions with OPIVV format */ -#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV_RM(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ do_##NAME); \ } =20 @@ -2004,10 +2002,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H= 1, saddu8) RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) -GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsaddu_vv_b) +GEN_VEXT_VV_RM(vsaddu_vv_h) +GEN_VEXT_VV_RM(vsaddu_vv_w) +GEN_VEXT_VV_RM(vsaddu_vv_d) =20 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, CPURISCVState *env, int vxrm); @@ -2039,7 +2037,7 @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void= *vs2, static inline void vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, - uint32_t desc, uint32_t esz, uint32_t dsz, + uint32_t desc, opivx2_rm_fn *fn) { uint32_t vm =3D vext_vm(desc); @@ -2066,11 +2064,11 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, vo= id *vs2, } =20 /* generate helpers for fixed point instructions with OPIVX format */ -#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VX_RM(NAME) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ - vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ do_##NAME); \ } =20 @@ -2078,10 +2076,10 @@ RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, s= addu8) RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) -GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsaddu_vx_b) +GEN_VEXT_VX_RM(vsaddu_vx_h) +GEN_VEXT_VX_RM(vsaddu_vx_w) +GEN_VEXT_VX_RM(vsaddu_vx_d) =20 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t = b) { @@ -2127,19 +2125,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1= , sadd8) RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) -GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsadd_vv_b) +GEN_VEXT_VV_RM(vsadd_vv_h) +GEN_VEXT_VV_RM(vsadd_vv_w) +GEN_VEXT_VV_RM(vsadd_vv_d) =20 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) -GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsadd_vx_b) +GEN_VEXT_VX_RM(vsadd_vx_h) +GEN_VEXT_VX_RM(vsadd_vx_w) +GEN_VEXT_VX_RM(vsadd_vx_d) =20 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint= 8_t b) { @@ -2188,19 +2186,19 @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H= 1, ssubu8) RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) -GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssubu_vv_b) +GEN_VEXT_VV_RM(vssubu_vv_h) +GEN_VEXT_VV_RM(vssubu_vv_w) +GEN_VEXT_VV_RM(vssubu_vv_d) =20 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) -GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssubu_vx_b) +GEN_VEXT_VX_RM(vssubu_vx_h) +GEN_VEXT_VX_RM(vssubu_vx_w) +GEN_VEXT_VX_RM(vssubu_vx_d) =20 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t = b) { @@ -2246,19 +2244,19 @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1= , ssub8) RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) -GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssub_vv_b) +GEN_VEXT_VV_RM(vssub_vv_h) +GEN_VEXT_VV_RM(vssub_vv_w) +GEN_VEXT_VV_RM(vssub_vv_d) =20 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) -GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssub_vx_b) +GEN_VEXT_VX_RM(vssub_vx_h) +GEN_VEXT_VX_RM(vssub_vx_w) +GEN_VEXT_VX_RM(vssub_vx_d) =20 /* Vector Single-Width Averaging Add and Subtract */ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) @@ -2310,19 +2308,19 @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1= , aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) -GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) -GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) -GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) -GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) +GEN_VEXT_VV_RM(vaadd_vv_b) +GEN_VEXT_VV_RM(vaadd_vv_h) +GEN_VEXT_VV_RM(vaadd_vv_w) +GEN_VEXT_VV_RM(vaadd_vv_d) =20 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) -GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) -GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) -GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) -GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) +GEN_VEXT_VX_RM(vaadd_vx_b) +GEN_VEXT_VX_RM(vaadd_vx_h) +GEN_VEXT_VX_RM(vaadd_vx_w) +GEN_VEXT_VX_RM(vaadd_vx_d) =20 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2347,19 +2345,19 @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H= 1, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) -GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vaaddu_vv_b) +GEN_VEXT_VV_RM(vaaddu_vv_h) +GEN_VEXT_VV_RM(vaaddu_vv_w) +GEN_VEXT_VV_RM(vaaddu_vv_d) =20 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) -GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vaaddu_vx_b) +GEN_VEXT_VX_RM(vaaddu_vx_h) +GEN_VEXT_VX_RM(vaaddu_vx_w) +GEN_VEXT_VX_RM(vaaddu_vx_d) =20 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int3= 2_t b) { @@ -2383,19 +2381,19 @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1= , asub32) RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) -GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) -GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) -GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) -GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) +GEN_VEXT_VV_RM(vasub_vv_b) +GEN_VEXT_VV_RM(vasub_vv_h) +GEN_VEXT_VV_RM(vasub_vv_w) +GEN_VEXT_VV_RM(vasub_vv_d) =20 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) -GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) -GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) -GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) -GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) +GEN_VEXT_VX_RM(vasub_vx_b) +GEN_VEXT_VX_RM(vasub_vx_h) +GEN_VEXT_VX_RM(vasub_vx_w) +GEN_VEXT_VX_RM(vasub_vx_d) =20 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2420,19 +2418,19 @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H= 1, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) -GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vasubu_vv_b) +GEN_VEXT_VV_RM(vasubu_vv_h) +GEN_VEXT_VV_RM(vasubu_vv_w) +GEN_VEXT_VV_RM(vasubu_vv_d) =20 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) -GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vasubu_vx_b) +GEN_VEXT_VX_RM(vasubu_vx_h) +GEN_VEXT_VX_RM(vasubu_vx_w) +GEN_VEXT_VX_RM(vasubu_vx_d) =20 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t= b) @@ -2527,19 +2525,19 @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1= , vsmul8) RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) -GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsmul_vv_b) +GEN_VEXT_VV_RM(vsmul_vv_h) +GEN_VEXT_VV_RM(vsmul_vv_w) +GEN_VEXT_VV_RM(vsmul_vv_d) =20 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) -GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsmul_vx_b) +GEN_VEXT_VX_RM(vsmul_vx_h) +GEN_VEXT_VX_RM(vsmul_vx_w) +GEN_VEXT_VX_RM(vsmul_vx_d) =20 /* Vector Single-Width Scaling Shift Instructions */ static inline uint8_t @@ -2586,19 +2584,19 @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1= , vssrl8) RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) -GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssrl_vv_b) +GEN_VEXT_VV_RM(vssrl_vv_h) +GEN_VEXT_VV_RM(vssrl_vv_w) +GEN_VEXT_VV_RM(vssrl_vv_d) =20 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) -GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssrl_vx_b) +GEN_VEXT_VX_RM(vssrl_vx_h) +GEN_VEXT_VX_RM(vssrl_vx_w) +GEN_VEXT_VX_RM(vssrl_vx_d) =20 static inline int8_t vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -2645,19 +2643,19 @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1= , vssra8) RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) -GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssra_vv_b) +GEN_VEXT_VV_RM(vssra_vv_h) +GEN_VEXT_VV_RM(vssra_vv_w) +GEN_VEXT_VV_RM(vssra_vv_d) =20 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) -GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssra_vx_b) +GEN_VEXT_VX_RM(vssra_vx_h) +GEN_VEXT_VX_RM(vssra_vx_w) +GEN_VEXT_VX_RM(vssra_vx_d) =20 /* Vector Narrowing Fixed-Point Clip Instructions */ static inline int8_t @@ -2720,16 +2718,16 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, i= nt32_t b) RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) -GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) -GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) -GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) +GEN_VEXT_VV_RM(vnclip_wv_b) +GEN_VEXT_VV_RM(vnclip_wv_h) +GEN_VEXT_VV_RM(vnclip_wv_w) =20 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) -GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) -GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) -GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) +GEN_VEXT_VX_RM(vnclip_wx_b) +GEN_VEXT_VX_RM(vnclip_wx_h) +GEN_VEXT_VX_RM(vnclip_wx_w) =20 static inline uint8_t vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) @@ -2782,16 +2780,16 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a,= uint32_t b) RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) -GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) -GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) -GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) +GEN_VEXT_VV_RM(vnclipu_wv_b) +GEN_VEXT_VV_RM(vnclipu_wv_h) +GEN_VEXT_VV_RM(vnclipu_wv_w) =20 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) -GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) -GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) -GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) +GEN_VEXT_VX_RM(vnclipu_wx_b) +GEN_VEXT_VX_RM(vnclipu_wx_h) +GEN_VEXT_VX_RM(vnclipu_wx_w) =20 /* *** Vector Float Point Arithmetic Instructions @@ -2806,7 +2804,7 @@ static void do_##NAME(void *vd, void *vs1, void *vs2,= int i, \ *((TD *)vd + HD(i)) =3D OP(s2, s1, &env->fp_status); \ } =20 -#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV_ENV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ @@ -2827,9 +2825,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) -GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfadd_vv_h) +GEN_VEXT_VV_ENV(vfadd_vv_w) +GEN_VEXT_VV_ENV(vfadd_vv_d) =20 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -2839,7 +2837,7 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs= 2, int i, \ *((TD *)vd + HD(i)) =3D OP(s2, (TX1)(T1)s1, &env->fp_status);\ } =20 -#define GEN_VEXT_VF(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VF(NAME) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ @@ -2860,22 +2858,22 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, = \ RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) -GEN_VEXT_VF(vfadd_vf_h, 2, 2) -GEN_VEXT_VF(vfadd_vf_w, 4, 4) -GEN_VEXT_VF(vfadd_vf_d, 8, 8) +GEN_VEXT_VF(vfadd_vf_h) +GEN_VEXT_VF(vfadd_vf_w) +GEN_VEXT_VF(vfadd_vf_d) =20 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) -GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsub_vv_h) +GEN_VEXT_VV_ENV(vfsub_vv_w) +GEN_VEXT_VV_ENV(vfsub_vv_d) RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) -GEN_VEXT_VF(vfsub_vf_h, 2, 2) -GEN_VEXT_VF(vfsub_vf_w, 4, 4) -GEN_VEXT_VF(vfsub_vf_d, 8, 8) +GEN_VEXT_VF(vfsub_vf_h) +GEN_VEXT_VF(vfsub_vf_w) +GEN_VEXT_VF(vfsub_vf_d) =20 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) { @@ -2895,9 +2893,9 @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, = float_status *s) RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) -GEN_VEXT_VF(vfrsub_vf_h, 2, 2) -GEN_VEXT_VF(vfrsub_vf_w, 4, 4) -GEN_VEXT_VF(vfrsub_vf_d, 8, 8) +GEN_VEXT_VF(vfrsub_vf_h) +GEN_VEXT_VF(vfrsub_vf_w) +GEN_VEXT_VF(vfrsub_vf_d) =20 /* Vector Widening Floating-Point Add/Subtract Instructions */ static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) @@ -2915,12 +2913,12 @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, fl= oat_status *s) =20 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) -GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwadd_vv_h) +GEN_VEXT_VV_ENV(vfwadd_vv_w) RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) -GEN_VEXT_VF(vfwadd_vf_h, 2, 4) -GEN_VEXT_VF(vfwadd_vf_w, 4, 8) +GEN_VEXT_VF(vfwadd_vf_h) +GEN_VEXT_VF(vfwadd_vf_w) =20 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { @@ -2937,12 +2935,12 @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, fl= oat_status *s) =20 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) -GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwsub_vv_h) +GEN_VEXT_VV_ENV(vfwsub_vv_w) RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) -GEN_VEXT_VF(vfwsub_vf_h, 2, 4) -GEN_VEXT_VF(vfwsub_vf_w, 4, 8) +GEN_VEXT_VF(vfwsub_vf_h) +GEN_VEXT_VF(vfwsub_vf_w) =20 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) { @@ -2956,12 +2954,12 @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, f= loat_status *s) =20 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) -GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwadd_wv_h) +GEN_VEXT_VV_ENV(vfwadd_wv_w) RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) -GEN_VEXT_VF(vfwadd_wf_h, 2, 4) -GEN_VEXT_VF(vfwadd_wf_w, 4, 8) +GEN_VEXT_VF(vfwadd_wf_h) +GEN_VEXT_VF(vfwadd_wf_w) =20 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) { @@ -2975,39 +2973,39 @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, f= loat_status *s) =20 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) -GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwsub_wv_h) +GEN_VEXT_VV_ENV(vfwsub_wv_w) RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) -GEN_VEXT_VF(vfwsub_wf_h, 2, 4) -GEN_VEXT_VF(vfwsub_wf_w, 4, 8) +GEN_VEXT_VF(vfwsub_wf_h) +GEN_VEXT_VF(vfwsub_wf_w) =20 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) -GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmul_vv_h) +GEN_VEXT_VV_ENV(vfmul_vv_w) +GEN_VEXT_VV_ENV(vfmul_vv_d) RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) -GEN_VEXT_VF(vfmul_vf_h, 2, 2) -GEN_VEXT_VF(vfmul_vf_w, 4, 4) -GEN_VEXT_VF(vfmul_vf_d, 8, 8) +GEN_VEXT_VF(vfmul_vf_h) +GEN_VEXT_VF(vfmul_vf_w) +GEN_VEXT_VF(vfmul_vf_d) =20 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) -GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfdiv_vv_h) +GEN_VEXT_VV_ENV(vfdiv_vv_w) +GEN_VEXT_VV_ENV(vfdiv_vv_d) RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) -GEN_VEXT_VF(vfdiv_vf_h, 2, 2) -GEN_VEXT_VF(vfdiv_vf_w, 4, 4) -GEN_VEXT_VF(vfdiv_vf_d, 8, 8) +GEN_VEXT_VF(vfdiv_vf_h) +GEN_VEXT_VF(vfdiv_vf_w) +GEN_VEXT_VF(vfdiv_vf_d) =20 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) { @@ -3027,9 +3025,9 @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, = float_status *s) RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) -GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) -GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) -GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) +GEN_VEXT_VF(vfrdiv_vf_h) +GEN_VEXT_VF(vfrdiv_vf_w) +GEN_VEXT_VF(vfrdiv_vf_d) =20 /* Vector Widening Floating-Point Multiply */ static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) @@ -3046,12 +3044,12 @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, fl= oat_status *s) } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) -GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmul_vv_h) +GEN_VEXT_VV_ENV(vfwmul_vv_w) RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) -GEN_VEXT_VF(vfwmul_vf_h, 2, 4) -GEN_VEXT_VF(vfwmul_vf_w, 4, 8) +GEN_VEXT_VF(vfwmul_vf_h) +GEN_VEXT_VF(vfwmul_vf_w) =20 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -3082,9 +3080,9 @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint6= 4_t d, float_status *s) RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) -GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmacc_vv_h) +GEN_VEXT_VV_ENV(vfmacc_vv_w) +GEN_VEXT_VV_ENV(vfmacc_vv_d) =20 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -3098,9 +3096,9 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs= 2, int i, \ RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) -GEN_VEXT_VF(vfmacc_vf_h, 2, 2) -GEN_VEXT_VF(vfmacc_vf_w, 4, 4) -GEN_VEXT_VF(vfmacc_vf_d, 8, 8) +GEN_VEXT_VF(vfmacc_vf_h) +GEN_VEXT_VF(vfmacc_vf_w) +GEN_VEXT_VF(vfmacc_vf_d) =20 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3123,15 +3121,15 @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) -GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmacc_vv_h) +GEN_VEXT_VV_ENV(vfnmacc_vv_w) +GEN_VEXT_VV_ENV(vfnmacc_vv_d) RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) -GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) -GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) -GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) +GEN_VEXT_VF(vfnmacc_vf_h) +GEN_VEXT_VF(vfnmacc_vf_w) +GEN_VEXT_VF(vfnmacc_vf_d) =20 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3151,15 +3149,15 @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) -GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmsac_vv_h) +GEN_VEXT_VV_ENV(vfmsac_vv_w) +GEN_VEXT_VV_ENV(vfmsac_vv_d) RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) -GEN_VEXT_VF(vfmsac_vf_h, 2, 2) -GEN_VEXT_VF(vfmsac_vf_w, 4, 4) -GEN_VEXT_VF(vfmsac_vf_d, 8, 8) +GEN_VEXT_VF(vfmsac_vf_h) +GEN_VEXT_VF(vfmsac_vf_w) +GEN_VEXT_VF(vfmsac_vf_d) =20 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3179,15 +3177,15 @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) -GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmsac_vv_h) +GEN_VEXT_VV_ENV(vfnmsac_vv_w) +GEN_VEXT_VV_ENV(vfnmsac_vv_d) RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) -GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) -GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) -GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) +GEN_VEXT_VF(vfnmsac_vf_h) +GEN_VEXT_VF(vfnmsac_vf_w) +GEN_VEXT_VF(vfnmsac_vf_d) =20 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3207,15 +3205,15 @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) -GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmadd_vv_h) +GEN_VEXT_VV_ENV(vfmadd_vv_w) +GEN_VEXT_VV_ENV(vfmadd_vv_d) RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) -GEN_VEXT_VF(vfmadd_vf_h, 2, 2) -GEN_VEXT_VF(vfmadd_vf_w, 4, 4) -GEN_VEXT_VF(vfmadd_vf_d, 8, 8) +GEN_VEXT_VF(vfmadd_vf_h) +GEN_VEXT_VF(vfmadd_vf_w) +GEN_VEXT_VF(vfmadd_vf_d) =20 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3238,15 +3236,15 @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) -GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmadd_vv_h) +GEN_VEXT_VV_ENV(vfnmadd_vv_w) +GEN_VEXT_VV_ENV(vfnmadd_vv_d) RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) -GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) -GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) -GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) +GEN_VEXT_VF(vfnmadd_vf_h) +GEN_VEXT_VF(vfnmadd_vf_w) +GEN_VEXT_VF(vfnmadd_vf_d) =20 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3266,15 +3264,15 @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) -GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmsub_vv_h) +GEN_VEXT_VV_ENV(vfmsub_vv_w) +GEN_VEXT_VV_ENV(vfmsub_vv_d) RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) -GEN_VEXT_VF(vfmsub_vf_h, 2, 2) -GEN_VEXT_VF(vfmsub_vf_w, 4, 4) -GEN_VEXT_VF(vfmsub_vf_d, 8, 8) +GEN_VEXT_VF(vfmsub_vf_h) +GEN_VEXT_VF(vfmsub_vf_w) +GEN_VEXT_VF(vfmsub_vf_d) =20 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3294,15 +3292,15 @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) -GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmsub_vv_h) +GEN_VEXT_VV_ENV(vfnmsub_vv_w) +GEN_VEXT_VV_ENV(vfnmsub_vv_d) RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) -GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) -GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) -GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) +GEN_VEXT_VF(vfnmsub_vf_h) +GEN_VEXT_VF(vfnmsub_vf_w) +GEN_VEXT_VF(vfnmsub_vf_d) =20 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status = *s) @@ -3319,12 +3317,12 @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, ui= nt64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) -GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmacc_vv_h) +GEN_VEXT_VV_ENV(vfwmacc_vv_w) RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) -GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) -GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) +GEN_VEXT_VF(vfwmacc_vf_h) +GEN_VEXT_VF(vfwmacc_vf_w) =20 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status= *s) { @@ -3342,12 +3340,12 @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, u= int64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) -GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w) RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) -GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) -GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) +GEN_VEXT_VF(vfwnmacc_vf_h) +GEN_VEXT_VF(vfwnmacc_vf_w) =20 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status = *s) { @@ -3365,12 +3363,12 @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, ui= nt64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) -GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmsac_vv_h) +GEN_VEXT_VV_ENV(vfwmsac_vv_w) RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) -GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) -GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) +GEN_VEXT_VF(vfwmsac_vf_h) +GEN_VEXT_VF(vfwmsac_vf_w) =20 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status= *s) { @@ -3388,12 +3386,12 @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, u= int64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) -GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w) RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) -GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) -GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) +GEN_VEXT_VF(vfwnmsac_vf_h) +GEN_VEXT_VF(vfwnmsac_vf_w) =20 /* Vector Floating-Point Square-Root Instruction */ /* (TD, T2, TX2) */ @@ -3409,7 +3407,7 @@ static void do_##NAME(void *vd, void *vs2, int i, = \ *((TD *)vd + HD(i)) =3D OP(s2, &env->fp_status); \ } =20 -#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_V_ENV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ @@ -3432,9 +3430,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) -GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) -GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) -GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) +GEN_VEXT_V_ENV(vfsqrt_v_h) +GEN_VEXT_V_ENV(vfsqrt_v_w) +GEN_VEXT_V_ENV(vfsqrt_v_d) =20 /* * Vector Floating-Point Reciprocal Square-Root Estimate Instruction @@ -3614,9 +3612,9 @@ static float64 frsqrt7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) -GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) -GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) -GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) +GEN_VEXT_V_ENV(vfrsqrt7_v_h) +GEN_VEXT_V_ENV(vfrsqrt7_v_w) +GEN_VEXT_V_ENV(vfrsqrt7_v_d) =20 /* * Vector Floating-Point Reciprocal Estimate Instruction @@ -3805,36 +3803,36 @@ static float64 frec7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) -GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) -GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) -GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) +GEN_VEXT_V_ENV(vfrec7_v_h) +GEN_VEXT_V_ENV(vfrec7_v_w) +GEN_VEXT_V_ENV(vfrec7_v_d) =20 /* Vector Floating-Point MIN/MAX Instructions */ RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) -GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmin_vv_h) +GEN_VEXT_VV_ENV(vfmin_vv_w) +GEN_VEXT_VV_ENV(vfmin_vv_d) RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) -GEN_VEXT_VF(vfmin_vf_h, 2, 2) -GEN_VEXT_VF(vfmin_vf_w, 4, 4) -GEN_VEXT_VF(vfmin_vf_d, 8, 8) +GEN_VEXT_VF(vfmin_vf_h) +GEN_VEXT_VF(vfmin_vf_w) +GEN_VEXT_VF(vfmin_vf_d) =20 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) -GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmax_vv_h) +GEN_VEXT_VV_ENV(vfmax_vv_w) +GEN_VEXT_VV_ENV(vfmax_vv_d) RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) -GEN_VEXT_VF(vfmax_vf_h, 2, 2) -GEN_VEXT_VF(vfmax_vf_w, 4, 4) -GEN_VEXT_VF(vfmax_vf_d, 8, 8) +GEN_VEXT_VF(vfmax_vf_h) +GEN_VEXT_VF(vfmax_vf_w) +GEN_VEXT_VF(vfmax_vf_d) =20 /* Vector Floating-Point Sign-Injection Instructions */ static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) @@ -3855,15 +3853,15 @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, flo= at_status *s) RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) -GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnj_vv_h) +GEN_VEXT_VV_ENV(vfsgnj_vv_w) +GEN_VEXT_VV_ENV(vfsgnj_vv_d) RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) -GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnj_vf_h) +GEN_VEXT_VF(vfsgnj_vf_w) +GEN_VEXT_VF(vfsgnj_vf_d) =20 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) { @@ -3883,15 +3881,15 @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, fl= oat_status *s) RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) -GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d) RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) -GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnjn_vf_h) +GEN_VEXT_VF(vfsgnjn_vf_w) +GEN_VEXT_VF(vfsgnjn_vf_d) =20 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) { @@ -3911,15 +3909,15 @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, fl= oat_status *s) RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) -GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d) RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) -GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnjx_vf_h) +GEN_VEXT_VF(vfsgnjx_vf_w) +GEN_VEXT_VF(vfsgnjx_vf_d) =20 /* Vector Floating-Point Compare Instructions */ #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ @@ -4063,7 +4061,7 @@ static void do_##NAME(void *vd, void *vs2, int i) = \ *((TD *)vd + HD(i)) =3D OP(s2); \ } =20 -#define GEN_VEXT_V(NAME, ESZ, DSZ) \ +#define GEN_VEXT_V(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ @@ -4140,9 +4138,9 @@ target_ulong fclass_d(uint64_t frs1) RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) -GEN_VEXT_V(vfclass_v_h, 2, 2) -GEN_VEXT_V(vfclass_v_w, 4, 4) -GEN_VEXT_V(vfclass_v_d, 8, 8) +GEN_VEXT_V(vfclass_v_h) +GEN_VEXT_V(vfclass_v_w) +GEN_VEXT_V(vfclass_v_d) =20 /* Vector Floating-Point Merge Instruction */ #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ @@ -4170,33 +4168,33 @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) =20 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) -GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d) =20 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) =20 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d) =20 /* Widening Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4206,30 +4204,30 @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned inte= ger.*/ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) =20 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer= . */ RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) =20 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width fl= oat */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) =20 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) =20 /* * vfwcvt.f.f.v vd, vs2, vm @@ -4242,8 +4240,8 @@ static uint32_t vfwcvtffv16(uint16_t a, float_status = *s) =20 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) =20 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4254,29 +4252,29 @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) =20 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer= . */ RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) -GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) -GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_x_f_w_b) +GEN_VEXT_V_ENV(vfncvt_x_f_w_h) +GEN_VEXT_V_ENV(vfncvt_x_f_w_w) =20 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to fl= oat */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) =20 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_x_w_h) +GEN_VEXT_V_ENV(vfncvt_f_x_w_w) =20 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. = */ static uint16_t vfncvtffv16(uint32_t a, float_status *s) @@ -4286,8 +4284,8 @@ static uint16_t vfncvtffv16(uint32_t a, float_status = *s) =20 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_f_w_h) +GEN_VEXT_V_ENV(vfncvt_f_f_w_w) =20 /* *** Vector Reduction Operations --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647694677831502.0770751912504; Sat, 19 Mar 2022 05:57:57 -0700 (PDT) Received: from localhost ([::1]:50202 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYea-00050e-LD for importer@patchew.org; Sat, 19 Mar 2022 08:57:56 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57528) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXd-0004nJ-Ry; Sat, 19 Mar 2022 08:50:46 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:35988) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXa-0003XT-U1; Sat, 19 Mar 2022 08:50:45 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id D2DE111EF41; Sat, 19 Mar 2022 12:50:40 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Wed, 09 Mar 2022 00:34:29 -0800 Subject: [PATCH qemu 02/13] target/riscv: rvv: Rename ambiguous esz Message-ID: <164769423983.18409.14760549429989700286-2@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647694678961100001 From: eopXD No functional change intended in this commit. Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 76 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index e94caf1a3c..d0452a7756 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -125,9 +125,9 @@ static inline int32_t vext_lmul(uint32_t desc) /* * Get the maximum number of elements can be operated. * - * esz: log2 of element size in bytes. + * log2_esz: log2 of element size in bytes. */ -static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) +static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) { /* * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. @@ -136,7 +136,7 @@ static inline uint32_t vext_max_elems(uint32_t desc, ui= nt32_t esz) uint32_t vlenb =3D simd_maxsz(desc); =20 /* Return VLMAX */ - int scale =3D vext_lmul(desc) - esz; + int scale =3D vext_lmul(desc) - log2_esz; return scale < 0 ? vlenb >> -scale : vlenb << scale; } =20 @@ -231,11 +231,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong bas= e, target_ulong stride, CPURISCVState *env, uint32_t desc, uint32_t vm, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra, MMUAccessType access_type) + uint32_t log2_esz, uintptr_t ra, MMUAccessType access_typ= e) { uint32_t i, k; uint32_t nf =3D vext_nf(desc); - uint32_t max_elems =3D vext_max_elems(desc, esz); + uint32_t max_elems =3D vext_max_elems(desc, log2_esz); =20 for (i =3D env->vstart; i < env->vl; i++, env->vstart++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -244,7 +244,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, =20 k =3D 0; while (k < nf) { - target_ulong addr =3D base + stride * i + (k << esz); + target_ulong addr =3D base + stride * i + (k << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); k++; } @@ -289,18 +289,18 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) /* unmasked unit-stride load and store operation*/ static void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t des= c, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, uintptr_t ra, MMUAccessType access_type) { uint32_t i, k; uint32_t nf =3D vext_nf(desc); - uint32_t max_elems =3D vext_max_elems(desc, esz); + uint32_t max_elems =3D vext_max_elems(desc, log2_esz); =20 /* load bytes from guest memory */ for (i =3D env->vstart; i < evl; i++, env->vstart++) { k =3D 0; while (k < nf) { - target_ulong addr =3D base + ((i * nf + k) << esz); + target_ulong addr =3D base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); k++; } @@ -399,12 +399,12 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, void *vs2, CPURISCVState *env, uint32_t desc, vext_get_index_addr get_index_addr, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra, MMUAccessType access_type) + uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type) { uint32_t i, k; uint32_t nf =3D vext_nf(desc); uint32_t vm =3D vext_vm(desc); - uint32_t max_elems =3D vext_max_elems(desc, esz); + uint32_t max_elems =3D vext_max_elems(desc, log2_esz); =20 /* load bytes from guest memory */ for (i =3D env->vstart; i < env->vl; i++, env->vstart++) { @@ -414,7 +414,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, =20 k =3D 0; while (k < nf) { - abi_ptr addr =3D get_index_addr(base, i, vs2) + (k << esz); + abi_ptr addr =3D get_index_addr(base, i, vs2) + (k << log2_esz= ); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); k++; } @@ -480,13 +480,13 @@ static inline void vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra) + uint32_t log2_esz, uintptr_t ra) { void *host; uint32_t i, k, vl =3D 0; uint32_t nf =3D vext_nf(desc); uint32_t vm =3D vext_vm(desc); - uint32_t max_elems =3D vext_max_elems(desc, esz); + uint32_t max_elems =3D vext_max_elems(desc, log2_esz); target_ulong addr, offset, remain; =20 /* probe every access*/ @@ -494,12 +494,12 @@ vext_ldff(void *vd, void *v0, target_ulong base, if (!vm && !vext_elem_mask(v0, i)) { continue; } - addr =3D adjust_addr(env, base + i * (nf << esz)); + addr =3D adjust_addr(env, base + i * (nf << log2_esz)); if (i =3D=3D 0) { - probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); + probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); } else { /* if it triggers an exception, no need to check watchpoint */ - remain =3D nf << esz; + remain =3D nf << log2_esz; while (remain > 0) { offset =3D -(addr | TARGET_PAGE_MASK); host =3D tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, @@ -536,7 +536,7 @@ ProbeSuccess: continue; } while (k < nf) { - target_ulong addr =3D base + ((i * nf + k) << esz); + target_ulong addr =3D base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); k++; } @@ -576,13 +576,13 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) */ static void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t = desc, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t= ra, MMUAccessType access_type) { uint32_t i, k, off, pos; uint32_t nf =3D vext_nf(desc); uint32_t vlenb =3D env_archcpu(env)->cfg.vlen >> 3; - uint32_t max_elems =3D vlenb >> esz; + uint32_t max_elems =3D vlenb >> log2_esz; =20 k =3D env->vstart / max_elems; off =3D env->vstart % max_elems; @@ -590,7 +590,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVSt= ate *env, uint32_t desc, if (off) { /* load/store rest of elements of current segment pointed by vstar= t */ for (pos =3D off; pos < max_elems; pos++, env->vstart++) { - target_ulong addr =3D base + ((pos + k * max_elems) << esz); + target_ulong addr =3D base + ((pos + k * max_elems) << log2_es= z); ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd= , ra); } k++; @@ -599,7 +599,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVSt= ate *env, uint32_t desc, /* load/store elements for rest of segments */ for (; k < nf; k++) { for (i =3D 0; i < max_elems; i++, env->vstart++) { - target_ulong addr =3D base + ((i + k * max_elems) << esz); + target_ulong addr =3D base + ((i + k * max_elems) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); } } @@ -4691,11 +4691,11 @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H= 2) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) =20 -#define GEN_VEXT_VSLIE1UP(ESZ, H) = \ -static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2= , \ - CPURISCVState *env, uint32_t desc) = \ +#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) = \ +static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, = \ + void *vs2, CPURISCVState *env, uint32_t desc) = \ { = \ - typedef uint##ESZ##_t ETYPE; = \ + typedef uint##BITWIDTH##_t ETYPE; = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ uint32_t i; = \ @@ -4718,11 +4718,11 @@ GEN_VEXT_VSLIE1UP(16, H2) GEN_VEXT_VSLIE1UP(32, H4) GEN_VEXT_VSLIE1UP(64, H8) =20 -#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ +#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } =20 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=3Dx[rs1], vd[i+1] =3D vs2[i] */ @@ -4731,11 +4731,11 @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) =20 -#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) = \ -static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *v= s2, \ - CPURISCVState *env, uint32_t desc) = \ +#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) = \ +static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, = \ + void *vs2, CPURISCVState *env, uint32_t desc) = \ { = \ - typedef uint##ESZ##_t ETYPE; = \ + typedef uint##BITWIDTH##_t ETYPE; = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ uint32_t i; = \ @@ -4758,11 +4758,11 @@ GEN_VEXT_VSLIDE1DOWN(16, H2) GEN_VEXT_VSLIDE1DOWN(32, H4) GEN_VEXT_VSLIDE1DOWN(64, H8) =20 -#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } =20 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] =3D vs2[i+1], vd[vl-1]=3Dx[rs1]= */ @@ -4772,11 +4772,11 @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) =20 /* Vector Floating-Point Slide Instructions */ -#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ +#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } =20 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=3Df[rs1], vd[i+1] =3D vs2[i] */ @@ -4784,11 +4784,11 @@ GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) =20 -#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ +#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } =20 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] =3D vs2[i+1], vd[vl-1]=3Df[rs1= ] */ --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 16476944522131007.8489673097644; Sat, 19 Mar 2022 05:54:12 -0700 (PDT) Received: from localhost ([::1]:41784 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYax-0007XF-1S for importer@patchew.org; Sat, 19 Mar 2022 08:54:11 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57532) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXd-0004ne-VB; Sat, 19 Mar 2022 08:50:46 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:35990) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXa-0003XU-Uw; Sat, 19 Mar 2022 08:50:45 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id F1ABA11EF53; Sat, 19 Mar 2022 12:50:40 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Fri, 11 Mar 2022 22:28:22 -0800 Subject: [PATCH qemu 03/13] target/riscv: rvv: Early exit when vstart >= vl MIME-Version: 1.0 Message-ID: <164769423983.18409.14760549429989700286-3@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647694453780100001 From: eopXD According to v-spec (section 5.4): When vstart =E2=89=A5 vl, there are no body elements, and no elements are updated in any destination vector register group, including that no tail elements are updated with agnostic values. Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 275fded6e4..3ae75dc6ae 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -652,6 +652,7 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, ui= nt32_t data, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -818,6 +819,7 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1= , uint32_t rs2, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -925,6 +927,7 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1,= uint32_t vs2, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -1067,6 +1070,7 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uin= t32_t data, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -1216,6 +1220,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3F= n *gvec_fn, } =20 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 if (a->vm && s->vl_eq_vlmax) { gvec_fn(s->sew, vreg_ofs(s, a->rd), @@ -1263,6 +1268,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, ui= nt32_t vs2, uint32_t vm, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -1427,6 +1433,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, ui= nt32_t vs2, uint32_t vm, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -1513,6 +1520,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr = *a, uint32_t data =3D 0; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -1593,6 +1601,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr = *a, uint32_t data =3D 0; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -1670,6 +1679,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ }; \ TCGLabel *over =3D gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -1851,6 +1861,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ }; \ TCGLabel *over =3D gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2061,6 +2072,7 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_= v *a) }; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), cpu_env, s->cfg_ptr->vlen / 8, @@ -2084,6 +2096,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_= x *a) TCGv s1; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 s1 =3D get_gpr(s, a->rs1, EXT_SIGN); =20 @@ -2139,6 +2152,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_= i *a) }; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 s1 =3D tcg_constant_i64(simm); dest =3D tcg_temp_new_ptr(); @@ -2291,6 +2305,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2321,6 +2336,7 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, ui= nt32_t vs2, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -2409,6 +2425,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2483,6 +2500,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2604,6 +2622,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, TCGLabel *over =3D gen_new_label(); gen_set_rm(s, rm); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -2717,6 +2736,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_= v_f *a) }; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 t1 =3D tcg_temp_new_i64(); /* NaN-box f[rs1] */ @@ -2805,6 +2825,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2856,6 +2877,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -2921,6 +2943,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2974,6 +2997,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -3061,6 +3085,7 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) = \ gen_helper_gvec_4_ptr *fn =3D gen_helper_##NAME; \ TCGLabel *over =3D gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -3164,6 +3189,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ gen_helper_gvec_3_ptr *fn =3D gen_helper_##NAME; \ TCGLabel *over =3D gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -3201,6 +3227,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_= m *a) uint32_t data =3D 0; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -3229,6 +3256,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) uint32_t data =3D 0; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -3674,6 +3702,7 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r= *a) }; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), @@ -3746,6 +3775,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, u= int8_t seq) gen_helper_gvec_3_ptr *fn; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 static gen_helper_gvec_3_ptr * const fns[6][4] =3D { { --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647695306271853.5476565995885; Sat, 19 Mar 2022 06:08:26 -0700 (PDT) Received: from localhost ([::1]:42344 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYok-00020W-M4 for importer@patchew.org; Sat, 19 Mar 2022 09:08:26 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57548) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXf-0004ni-Bs; Sat, 19 Mar 2022 08:50:48 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:35992) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXa-0003XV-W7; Sat, 19 Mar 2022 08:50:46 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 26C7011EF58; Sat, 19 Mar 2022 12:50:41 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Tue, 01 Mar 2022 01:07:38 -0800 Subject: [PATCH qemu 04/13] target/riscv: rvv: Add tail agnostic for vv instructions Message-ID: <164769423983.18409.14760549429989700286-4@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647695327732100001 From: eopXD This is the first commit regarding the tail agnostic behavior. Added option 'rvv_ta_all_1s' to enable the behavior, the option is default to false. Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/cpu.c | 1 + target/riscv/cpu.h | 2 + target/riscv/cpu_helper.c | 2 + target/riscv/insn_trans/trans_rvv.c.inc | 1 + target/riscv/internals.h | 5 +- target/riscv/translate.c | 2 + target/riscv/vector_helper.c | 301 ++++++++++++++---------- 7 files changed, 183 insertions(+), 131 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index ddda4906ff..cd4cf4b41e 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -810,6 +810,7 @@ static Property riscv_cpu_properties[] =3D { DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), =20 DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), + DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_END_OF_LIST(), }; =20 diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index c069fe85fa..8c4a79b5a0 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -369,6 +369,7 @@ struct RISCVCPUConfig { bool ext_zhinxmin; bool ext_zve32f; bool ext_zve64f; + bool rvv_ta_all_1s; =20 /* Vendor-specific custom extensions */ bool ext_XVentanaCondOps; @@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2) /* If PointerMasking should be applied */ FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1) FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1) +FIELD(TB_FLAGS, VTA, 24, 1) =20 #ifdef TARGET_RISCV32 #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 1c60fb2e80..2941c88c31 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulon= g *pc, flags =3D FIELD_DP32(flags, TB_FLAGS, LMUL, FIELD_EX64(env->vtype, VTYPE, VLMUL)); flags =3D FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); + flags =3D FIELD_DP32(flags, TB_FLAGS, VTA, + FIELD_EX64(env->vtype, VTYPE, VTA)); } else { flags =3D FIELD_DP32(flags, TB_FLAGS, VILL, 1); } diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 3ae75dc6ae..3efac1efe0 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1231,6 +1231,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3F= n *gvec_fn, =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, diff --git a/target/riscv/internals.h b/target/riscv/internals.h index dbb322bfa7..512c6c30cf 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -24,8 +24,9 @@ /* share data between vector helpers and decode code */ FIELD(VDATA, VM, 0, 1) FIELD(VDATA, LMUL, 1, 3) -FIELD(VDATA, NF, 4, 4) -FIELD(VDATA, WD, 4, 1) +FIELD(VDATA, VTA, 4, 1) +FIELD(VDATA, NF, 5, 4) +FIELD(VDATA, WD, 5, 1) =20 /* float point classify helpers */ target_ulong fclass_h(uint64_t frs1); diff --git a/target/riscv/translate.c b/target/riscv/translate.c index fac998a6b5..7775dade26 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -94,6 +94,7 @@ typedef struct DisasContext { */ int8_t lmul; uint8_t sew; + uint8_t vta; target_ulong vstart; bool vl_eq_vlmax; uint8_t ntemp; @@ -1083,6 +1084,7 @@ static void riscv_tr_init_disas_context(DisasContextB= ase *dcbase, CPUState *cs) ctx->vill =3D FIELD_EX32(tb_flags, TB_FLAGS, VILL); ctx->sew =3D FIELD_EX32(tb_flags, TB_FLAGS, SEW); ctx->lmul =3D sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); + ctx->vta =3D FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_al= l_1s; ctx->vstart =3D env->vstart; ctx->vl_eq_vlmax =3D FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); ctx->misa_mxl_max =3D env->misa_mxl_max; diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index d0452a7756..2e8a9f3578 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -122,6 +122,11 @@ static inline int32_t vext_lmul(uint32_t desc) return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); } =20 +static inline uint32_t vext_vta(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA); +} + /* * Get the maximum number of elements can be operated. * @@ -172,6 +177,32 @@ static void probe_pages(CPURISCVState *env, target_ulo= ng addr, } } =20 +static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t c= nt, + uint32_t tot) +{ + if (is_agnostic =3D=3D 0) { + /* policy undisturbed */ + return; + } + if (tot - cnt =3D=3D 0) { + return ; + } + memset(base, -1, tot - cnt); +} + +/* Set agnostic elements to 1s */ +#define GEN_SET_ELEMS_1S(SET_ELEMS_1S_FN, ETYPE, H) = \ +static void SET_ELEMS_1S_FN(void *vd, uint32_t is_agnostic, uint32_t idx, = \ + uint32_t cnt, uint32_t tot) = \ +{ = \ + ETYPE *cur =3D ((ETYPE *)vd + H(idx)); = \ + vext_set_elems_1s(cur, is_agnostic, cnt, tot); = \ +} +GEN_SET_ELEMS_1S(vext_set_elems_1s_b, int8_t, H1) +GEN_SET_ELEMS_1S(vext_set_elems_1s_h, int16_t, H2) +GEN_SET_ELEMS_1S(vext_set_elems_1s_w, int32_t, H4) +GEN_SET_ELEMS_1S(vext_set_elems_1s_d, int64_t, H8) + static inline void vext_set_elem_mask(void *v0, int index, uint8_t value) { @@ -197,6 +228,14 @@ static inline int vext_elem_mask(void *v0, int index) typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, uint32_t idx, void *vd, uintptr_t retaddr); =20 +/* set bytes to all 1s for agnostic elements */ +typedef void vext_set_elems_1s_fn(void *vd, uint32_t vta, uint32_t idx, + uint32_t cnt, uint32_t tot); +static vext_set_elems_1s_fn *vext_set_elems_1s_fns[4] =3D { + vext_set_elems_1s_b, vext_set_elems_1s_h, + vext_set_elems_1s_w, vext_set_elems_1s_d +}; + #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ static void NAME(CPURISCVState *env, abi_ptr addr, \ uint32_t idx, void *vd, uintptr_t retaddr)\ @@ -710,10 +749,12 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_S= UB) =20 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - opivv2_fn *fn) + opivv2_fn *fn, uint32_t esz) { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); uint32_t i; =20 for (i =3D env->vstart; i < vl; i++) { @@ -723,26 +764,28 @@ static void do_vext_vv(void *vd, void *v0, void *vs1,= void *vs2, fn(vd, vs1, vs2, i); } env->vstart =3D 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); } =20 /* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME) \ +#define GEN_VEXT_VV(NAME, ETYPE) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ do_vext_vv(vd, v0, vs1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, sizeof(ETYPE)); \ } =20 -GEN_VEXT_VV(vadd_vv_b) -GEN_VEXT_VV(vadd_vv_h) -GEN_VEXT_VV(vadd_vv_w) -GEN_VEXT_VV(vadd_vv_d) -GEN_VEXT_VV(vsub_vv_b) -GEN_VEXT_VV(vsub_vv_h) -GEN_VEXT_VV(vsub_vv_w) -GEN_VEXT_VV(vsub_vv_d) +GEN_VEXT_VV(vadd_vv_b, uint8_t) +GEN_VEXT_VV(vadd_vv_h, uint16_t) +GEN_VEXT_VV(vadd_vv_w, uint32_t) +GEN_VEXT_VV(vadd_vv_d, uint64_t) +GEN_VEXT_VV(vsub_vv_b, uint8_t) +GEN_VEXT_VV(vsub_vv_h, uint16_t) +GEN_VEXT_VV(vsub_vv_w, uint32_t) +GEN_VEXT_VV(vsub_vv_d, uint64_t) =20 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); =20 @@ -887,30 +930,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, D= O_ADD) RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) -GEN_VEXT_VV(vwaddu_vv_b) -GEN_VEXT_VV(vwaddu_vv_h) -GEN_VEXT_VV(vwaddu_vv_w) -GEN_VEXT_VV(vwsubu_vv_b) -GEN_VEXT_VV(vwsubu_vv_h) -GEN_VEXT_VV(vwsubu_vv_w) -GEN_VEXT_VV(vwadd_vv_b) -GEN_VEXT_VV(vwadd_vv_h) -GEN_VEXT_VV(vwadd_vv_w) -GEN_VEXT_VV(vwsub_vv_b) -GEN_VEXT_VV(vwsub_vv_h) -GEN_VEXT_VV(vwsub_vv_w) -GEN_VEXT_VV(vwaddu_wv_b) -GEN_VEXT_VV(vwaddu_wv_h) -GEN_VEXT_VV(vwaddu_wv_w) -GEN_VEXT_VV(vwsubu_wv_b) -GEN_VEXT_VV(vwsubu_wv_h) -GEN_VEXT_VV(vwsubu_wv_w) -GEN_VEXT_VV(vwadd_wv_b) -GEN_VEXT_VV(vwadd_wv_h) -GEN_VEXT_VV(vwadd_wv_w) -GEN_VEXT_VV(vwsub_wv_b) -GEN_VEXT_VV(vwsub_wv_h) -GEN_VEXT_VV(vwsub_wv_w) +GEN_VEXT_VV(vwaddu_vv_b, uint16_t) +GEN_VEXT_VV(vwaddu_vv_h, uint32_t) +GEN_VEXT_VV(vwaddu_vv_w, uint64_t) +GEN_VEXT_VV(vwsubu_vv_b, uint16_t) +GEN_VEXT_VV(vwsubu_vv_h, uint32_t) +GEN_VEXT_VV(vwsubu_vv_w, uint64_t) +GEN_VEXT_VV(vwadd_vv_b, uint16_t) +GEN_VEXT_VV(vwadd_vv_h, uint32_t) +GEN_VEXT_VV(vwadd_vv_w, uint64_t) +GEN_VEXT_VV(vwsub_vv_b, uint16_t) +GEN_VEXT_VV(vwsub_vv_h, uint32_t) +GEN_VEXT_VV(vwsub_vv_w, uint64_t) +GEN_VEXT_VV(vwaddu_wv_b, uint16_t) +GEN_VEXT_VV(vwaddu_wv_h, uint32_t) +GEN_VEXT_VV(vwaddu_wv_w, uint64_t) +GEN_VEXT_VV(vwsubu_wv_b, uint16_t) +GEN_VEXT_VV(vwsubu_wv_h, uint32_t) +GEN_VEXT_VV(vwsubu_wv_w, uint64_t) +GEN_VEXT_VV(vwadd_wv_b, uint16_t) +GEN_VEXT_VV(vwadd_wv_h, uint32_t) +GEN_VEXT_VV(vwadd_wv_w, uint64_t) +GEN_VEXT_VV(vwsub_wv_b, uint16_t) +GEN_VEXT_VV(vwsub_wv_h, uint32_t) +GEN_VEXT_VV(vwsub_wv_w, uint64_t) =20 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) @@ -1089,18 +1132,18 @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO= _XOR) RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) -GEN_VEXT_VV(vand_vv_b) -GEN_VEXT_VV(vand_vv_h) -GEN_VEXT_VV(vand_vv_w) -GEN_VEXT_VV(vand_vv_d) -GEN_VEXT_VV(vor_vv_b) -GEN_VEXT_VV(vor_vv_h) -GEN_VEXT_VV(vor_vv_w) -GEN_VEXT_VV(vor_vv_d) -GEN_VEXT_VV(vxor_vv_b) -GEN_VEXT_VV(vxor_vv_h) -GEN_VEXT_VV(vxor_vv_w) -GEN_VEXT_VV(vxor_vv_d) +GEN_VEXT_VV(vand_vv_b, uint8_t) +GEN_VEXT_VV(vand_vv_h, uint16_t) +GEN_VEXT_VV(vand_vv_w, uint32_t) +GEN_VEXT_VV(vand_vv_d, uint64_t) +GEN_VEXT_VV(vor_vv_b, uint8_t) +GEN_VEXT_VV(vor_vv_h, uint16_t) +GEN_VEXT_VV(vor_vv_w, uint32_t) +GEN_VEXT_VV(vor_vv_d, uint64_t) +GEN_VEXT_VV(vxor_vv_b, uint8_t) +GEN_VEXT_VV(vxor_vv_h, uint16_t) +GEN_VEXT_VV(vxor_vv_w, uint32_t) +GEN_VEXT_VV(vxor_vv_d, uint64_t) =20 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) @@ -1346,22 +1389,22 @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO= _MAX) RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) -GEN_VEXT_VV(vminu_vv_b) -GEN_VEXT_VV(vminu_vv_h) -GEN_VEXT_VV(vminu_vv_w) -GEN_VEXT_VV(vminu_vv_d) -GEN_VEXT_VV(vmin_vv_b) -GEN_VEXT_VV(vmin_vv_h) -GEN_VEXT_VV(vmin_vv_w) -GEN_VEXT_VV(vmin_vv_d) -GEN_VEXT_VV(vmaxu_vv_b) -GEN_VEXT_VV(vmaxu_vv_h) -GEN_VEXT_VV(vmaxu_vv_w) -GEN_VEXT_VV(vmaxu_vv_d) -GEN_VEXT_VV(vmax_vv_b) -GEN_VEXT_VV(vmax_vv_h) -GEN_VEXT_VV(vmax_vv_w) -GEN_VEXT_VV(vmax_vv_d) +GEN_VEXT_VV(vminu_vv_b, uint8_t) +GEN_VEXT_VV(vminu_vv_h, uint16_t) +GEN_VEXT_VV(vminu_vv_w, uint32_t) +GEN_VEXT_VV(vminu_vv_d, uint64_t) +GEN_VEXT_VV(vmin_vv_b, uint8_t) +GEN_VEXT_VV(vmin_vv_h, uint16_t) +GEN_VEXT_VV(vmin_vv_w, uint32_t) +GEN_VEXT_VV(vmin_vv_d, uint64_t) +GEN_VEXT_VV(vmaxu_vv_b, uint8_t) +GEN_VEXT_VV(vmaxu_vv_h, uint16_t) +GEN_VEXT_VV(vmaxu_vv_w, uint32_t) +GEN_VEXT_VV(vmaxu_vv_d, uint64_t) +GEN_VEXT_VV(vmax_vv_b, uint8_t) +GEN_VEXT_VV(vmax_vv_h, uint16_t) +GEN_VEXT_VV(vmax_vv_w, uint32_t) +GEN_VEXT_VV(vmax_vv_d, uint64_t) =20 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) @@ -1402,10 +1445,10 @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO= _MUL) RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) -GEN_VEXT_VV(vmul_vv_b) -GEN_VEXT_VV(vmul_vv_h) -GEN_VEXT_VV(vmul_vv_w) -GEN_VEXT_VV(vmul_vv_d) +GEN_VEXT_VV(vmul_vv_b, uint8_t) +GEN_VEXT_VV(vmul_vv_h, uint16_t) +GEN_VEXT_VV(vmul_vv_w, uint32_t) +GEN_VEXT_VV(vmul_vv_d, uint64_t) =20 static int8_t do_mulh_b(int8_t s2, int8_t s1) { @@ -1509,18 +1552,18 @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1,= do_mulhsu_b) RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) -GEN_VEXT_VV(vmulh_vv_b) -GEN_VEXT_VV(vmulh_vv_h) -GEN_VEXT_VV(vmulh_vv_w) -GEN_VEXT_VV(vmulh_vv_d) -GEN_VEXT_VV(vmulhu_vv_b) -GEN_VEXT_VV(vmulhu_vv_h) -GEN_VEXT_VV(vmulhu_vv_w) -GEN_VEXT_VV(vmulhu_vv_d) -GEN_VEXT_VV(vmulhsu_vv_b) -GEN_VEXT_VV(vmulhsu_vv_h) -GEN_VEXT_VV(vmulhsu_vv_w) -GEN_VEXT_VV(vmulhsu_vv_d) +GEN_VEXT_VV(vmulh_vv_b, uint8_t) +GEN_VEXT_VV(vmulh_vv_h, uint16_t) +GEN_VEXT_VV(vmulh_vv_w, uint32_t) +GEN_VEXT_VV(vmulh_vv_d, uint64_t) +GEN_VEXT_VV(vmulhu_vv_b, uint8_t) +GEN_VEXT_VV(vmulhu_vv_h, uint16_t) +GEN_VEXT_VV(vmulhu_vv_w, uint32_t) +GEN_VEXT_VV(vmulhu_vv_d, uint64_t) +GEN_VEXT_VV(vmulhsu_vv_b, uint8_t) +GEN_VEXT_VV(vmulhsu_vv_h, uint16_t) +GEN_VEXT_VV(vmulhsu_vv_w, uint32_t) +GEN_VEXT_VV(vmulhsu_vv_d, uint64_t) =20 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) @@ -1579,22 +1622,22 @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO= _REM) RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) -GEN_VEXT_VV(vdivu_vv_b) -GEN_VEXT_VV(vdivu_vv_h) -GEN_VEXT_VV(vdivu_vv_w) -GEN_VEXT_VV(vdivu_vv_d) -GEN_VEXT_VV(vdiv_vv_b) -GEN_VEXT_VV(vdiv_vv_h) -GEN_VEXT_VV(vdiv_vv_w) -GEN_VEXT_VV(vdiv_vv_d) -GEN_VEXT_VV(vremu_vv_b) -GEN_VEXT_VV(vremu_vv_h) -GEN_VEXT_VV(vremu_vv_w) -GEN_VEXT_VV(vremu_vv_d) -GEN_VEXT_VV(vrem_vv_b) -GEN_VEXT_VV(vrem_vv_h) -GEN_VEXT_VV(vrem_vv_w) -GEN_VEXT_VV(vrem_vv_d) +GEN_VEXT_VV(vdivu_vv_b, uint8_t) +GEN_VEXT_VV(vdivu_vv_h, uint16_t) +GEN_VEXT_VV(vdivu_vv_w, uint32_t) +GEN_VEXT_VV(vdivu_vv_d, uint64_t) +GEN_VEXT_VV(vdiv_vv_b, uint8_t) +GEN_VEXT_VV(vdiv_vv_h, uint16_t) +GEN_VEXT_VV(vdiv_vv_w, uint32_t) +GEN_VEXT_VV(vdiv_vv_d, uint64_t) +GEN_VEXT_VV(vremu_vv_b, uint8_t) +GEN_VEXT_VV(vremu_vv_h, uint16_t) +GEN_VEXT_VV(vremu_vv_w, uint32_t) +GEN_VEXT_VV(vremu_vv_d, uint64_t) +GEN_VEXT_VV(vrem_vv_b, uint8_t) +GEN_VEXT_VV(vrem_vv_h, uint16_t) +GEN_VEXT_VV(vrem_vv_w, uint32_t) +GEN_VEXT_VV(vrem_vv_d, uint64_t) =20 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) @@ -1639,15 +1682,15 @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4,= DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) -GEN_VEXT_VV(vwmul_vv_b) -GEN_VEXT_VV(vwmul_vv_h) -GEN_VEXT_VV(vwmul_vv_w) -GEN_VEXT_VV(vwmulu_vv_b) -GEN_VEXT_VV(vwmulu_vv_h) -GEN_VEXT_VV(vwmulu_vv_w) -GEN_VEXT_VV(vwmulsu_vv_b) -GEN_VEXT_VV(vwmulsu_vv_h) -GEN_VEXT_VV(vwmulsu_vv_w) +GEN_VEXT_VV(vwmul_vv_b, uint16_t) +GEN_VEXT_VV(vwmul_vv_h, uint32_t) +GEN_VEXT_VV(vwmul_vv_w, uint64_t) +GEN_VEXT_VV(vwmulu_vv_b, uint16_t) +GEN_VEXT_VV(vwmulu_vv_h, uint32_t) +GEN_VEXT_VV(vwmulu_vv_w, uint64_t) +GEN_VEXT_VV(vwmulsu_vv_b, uint16_t) +GEN_VEXT_VV(vwmulsu_vv_h, uint32_t) +GEN_VEXT_VV(vwmulsu_vv_w, uint64_t) =20 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) @@ -1698,22 +1741,22 @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, = DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) -GEN_VEXT_VV(vmacc_vv_b) -GEN_VEXT_VV(vmacc_vv_h) -GEN_VEXT_VV(vmacc_vv_w) -GEN_VEXT_VV(vmacc_vv_d) -GEN_VEXT_VV(vnmsac_vv_b) -GEN_VEXT_VV(vnmsac_vv_h) -GEN_VEXT_VV(vnmsac_vv_w) -GEN_VEXT_VV(vnmsac_vv_d) -GEN_VEXT_VV(vmadd_vv_b) -GEN_VEXT_VV(vmadd_vv_h) -GEN_VEXT_VV(vmadd_vv_w) -GEN_VEXT_VV(vmadd_vv_d) -GEN_VEXT_VV(vnmsub_vv_b) -GEN_VEXT_VV(vnmsub_vv_h) -GEN_VEXT_VV(vnmsub_vv_w) -GEN_VEXT_VV(vnmsub_vv_d) +GEN_VEXT_VV(vmacc_vv_b, uint8_t) +GEN_VEXT_VV(vmacc_vv_h, uint16_t) +GEN_VEXT_VV(vmacc_vv_w, uint32_t) +GEN_VEXT_VV(vmacc_vv_d, uint64_t) +GEN_VEXT_VV(vnmsac_vv_b, uint8_t) +GEN_VEXT_VV(vnmsac_vv_h, uint16_t) +GEN_VEXT_VV(vnmsac_vv_w, uint32_t) +GEN_VEXT_VV(vnmsac_vv_d, uint64_t) +GEN_VEXT_VV(vmadd_vv_b, uint8_t) +GEN_VEXT_VV(vmadd_vv_h, uint16_t) +GEN_VEXT_VV(vmadd_vv_w, uint32_t) +GEN_VEXT_VV(vmadd_vv_d, uint64_t) +GEN_VEXT_VV(vnmsub_vv_b, uint8_t) +GEN_VEXT_VV(vnmsub_vv_h, uint16_t) +GEN_VEXT_VV(vnmsub_vv_w, uint32_t) +GEN_VEXT_VV(vnmsub_vv_d, uint64_t) =20 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ @@ -1766,15 +1809,15 @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4,= DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) -GEN_VEXT_VV(vwmaccu_vv_b) -GEN_VEXT_VV(vwmaccu_vv_h) -GEN_VEXT_VV(vwmaccu_vv_w) -GEN_VEXT_VV(vwmacc_vv_b) -GEN_VEXT_VV(vwmacc_vv_h) -GEN_VEXT_VV(vwmacc_vv_w) -GEN_VEXT_VV(vwmaccsu_vv_b) -GEN_VEXT_VV(vwmaccsu_vv_h) -GEN_VEXT_VV(vwmaccsu_vv_w) +GEN_VEXT_VV(vwmaccu_vv_b, uint16_t) +GEN_VEXT_VV(vwmaccu_vv_h, uint32_t) +GEN_VEXT_VV(vwmaccu_vv_w, uint64_t) +GEN_VEXT_VV(vwmacc_vv_b, uint16_t) +GEN_VEXT_VV(vwmacc_vv_h, uint32_t) +GEN_VEXT_VV(vwmacc_vv_w, uint64_t) +GEN_VEXT_VV(vwmaccsu_vv_b, uint16_t) +GEN_VEXT_VV(vwmaccsu_vv_h, uint32_t) +GEN_VEXT_VV(vwmaccsu_vv_w, uint64_t) =20 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647694599617400.15839327089225; Sat, 19 Mar 2022 05:56:39 -0700 (PDT) Received: from localhost ([::1]:45414 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYdK-0001qb-AE for importer@patchew.org; Sat, 19 Mar 2022 08:56:38 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57606) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXg-0004of-DY; Sat, 19 Mar 2022 08:50:48 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:35994) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXd-0003YJ-UO; Sat, 19 Mar 2022 08:50:48 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 44BEA11EF5B; Sat, 19 Mar 2022 12:50:41 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Sun, 06 Mar 2022 23:32:57 -0800 Subject: [PATCH qemu 05/13] target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions Message-ID: <164769423983.18409.14760549429989700286-5@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647694600649100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 5 + target/riscv/vector_helper.c | 311 +++++++++++++----------- 2 files changed, 178 insertions(+), 138 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 3efac1efe0..ac6379a8fc 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1278,6 +1278,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, ui= nt32_t vs2, uint32_t vm, =20 data =3D FIELD_DP32(data, VDATA, VM, vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); desc =3D tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); =20 @@ -1443,6 +1444,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, ui= nt32_t vs2, uint32_t vm, =20 data =3D FIELD_DP32(data, VDATA, VM, vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); desc =3D tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); =20 @@ -1525,6 +1527,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr = *a, =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), @@ -1606,6 +1609,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr = *a, =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), @@ -1684,6 +1688,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 2e8a9f3578..56a32adcb9 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -815,10 +815,12 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) =20 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - opivx2_fn fn) + opivx2_fn fn, uint32_t esz) { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); uint32_t i; =20 for (i =3D env->vstart; i < vl; i++) { @@ -828,30 +830,32 @@ static void do_vext_vx(void *vd, void *v0, target_lon= g s1, void *vs2, fn(vd, s1, vs2, i); } env->vstart =3D 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); } =20 /* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME) \ +#define GEN_VEXT_VX(NAME, ETYPE) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ do_vext_vx(vd, v0, s1, vs2, env, desc, \ - do_##NAME); \ -} - -GEN_VEXT_VX(vadd_vx_b) -GEN_VEXT_VX(vadd_vx_h) -GEN_VEXT_VX(vadd_vx_w) -GEN_VEXT_VX(vadd_vx_d) -GEN_VEXT_VX(vsub_vx_b) -GEN_VEXT_VX(vsub_vx_h) -GEN_VEXT_VX(vsub_vx_w) -GEN_VEXT_VX(vsub_vx_d) -GEN_VEXT_VX(vrsub_vx_b) -GEN_VEXT_VX(vrsub_vx_h) -GEN_VEXT_VX(vrsub_vx_w) -GEN_VEXT_VX(vrsub_vx_d) + do_##NAME, sizeof(ETYPE)); \ +} + +GEN_VEXT_VX(vadd_vx_b, uint8_t) +GEN_VEXT_VX(vadd_vx_h, uint16_t) +GEN_VEXT_VX(vadd_vx_w, uint32_t) +GEN_VEXT_VX(vadd_vx_d, uint64_t) +GEN_VEXT_VX(vsub_vx_b, uint8_t) +GEN_VEXT_VX(vsub_vx_h, uint16_t) +GEN_VEXT_VX(vsub_vx_w, uint32_t) +GEN_VEXT_VX(vsub_vx_d, uint64_t) +GEN_VEXT_VX(vrsub_vx_b, uint8_t) +GEN_VEXT_VX(vrsub_vx_h, uint16_t) +GEN_VEXT_VX(vrsub_vx_w, uint32_t) +GEN_VEXT_VX(vrsub_vx_d, uint64_t) =20 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) { @@ -979,30 +983,30 @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_AD= D) RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) -GEN_VEXT_VX(vwaddu_vx_b) -GEN_VEXT_VX(vwaddu_vx_h) -GEN_VEXT_VX(vwaddu_vx_w) -GEN_VEXT_VX(vwsubu_vx_b) -GEN_VEXT_VX(vwsubu_vx_h) -GEN_VEXT_VX(vwsubu_vx_w) -GEN_VEXT_VX(vwadd_vx_b) -GEN_VEXT_VX(vwadd_vx_h) -GEN_VEXT_VX(vwadd_vx_w) -GEN_VEXT_VX(vwsub_vx_b) -GEN_VEXT_VX(vwsub_vx_h) -GEN_VEXT_VX(vwsub_vx_w) -GEN_VEXT_VX(vwaddu_wx_b) -GEN_VEXT_VX(vwaddu_wx_h) -GEN_VEXT_VX(vwaddu_wx_w) -GEN_VEXT_VX(vwsubu_wx_b) -GEN_VEXT_VX(vwsubu_wx_h) -GEN_VEXT_VX(vwsubu_wx_w) -GEN_VEXT_VX(vwadd_wx_b) -GEN_VEXT_VX(vwadd_wx_h) -GEN_VEXT_VX(vwadd_wx_w) -GEN_VEXT_VX(vwsub_wx_b) -GEN_VEXT_VX(vwsub_wx_h) -GEN_VEXT_VX(vwsub_wx_w) +GEN_VEXT_VX(vwaddu_vx_b, uint16_t) +GEN_VEXT_VX(vwaddu_vx_h, uint32_t) +GEN_VEXT_VX(vwaddu_vx_w, uint64_t) +GEN_VEXT_VX(vwsubu_vx_b, uint16_t) +GEN_VEXT_VX(vwsubu_vx_h, uint32_t) +GEN_VEXT_VX(vwsubu_vx_w, uint64_t) +GEN_VEXT_VX(vwadd_vx_b, uint16_t) +GEN_VEXT_VX(vwadd_vx_h, uint32_t) +GEN_VEXT_VX(vwadd_vx_w, uint64_t) +GEN_VEXT_VX(vwsub_vx_b, uint16_t) +GEN_VEXT_VX(vwsub_vx_h, uint32_t) +GEN_VEXT_VX(vwsub_vx_w, uint64_t) +GEN_VEXT_VX(vwaddu_wx_b, uint16_t) +GEN_VEXT_VX(vwaddu_wx_h, uint32_t) +GEN_VEXT_VX(vwaddu_wx_w, uint64_t) +GEN_VEXT_VX(vwsubu_wx_b, uint16_t) +GEN_VEXT_VX(vwsubu_wx_h, uint32_t) +GEN_VEXT_VX(vwsubu_wx_w, uint64_t) +GEN_VEXT_VX(vwadd_wx_b, uint16_t) +GEN_VEXT_VX(vwadd_wx_h, uint32_t) +GEN_VEXT_VX(vwadd_wx_w, uint64_t) +GEN_VEXT_VX(vwsub_wx_b, uint16_t) +GEN_VEXT_VX(vwsub_wx_h, uint32_t) +GEN_VEXT_VX(vwsub_wx_w, uint64_t) =20 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ #define DO_VADC(N, M, C) (N + M + C) @@ -1013,6 +1017,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1023,6 +1031,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ *((ETYPE *)vd + H(i)) =3D DO_OP(s2, s1, carry); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) @@ -1040,6 +1051,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D = \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -1049,6 +1064,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ *((ETYPE *)vd + H(i)) =3D DO_OP(s2, (ETYPE)(target_long)s1, carry)= ;\ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz);\ } =20 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) @@ -1071,6 +1088,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ { \ uint32_t vl =3D env->vl; \ uint32_t vm =3D vext_vm(desc); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1080,6 +1100,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ } \ env->vstart =3D 0; \ + /* clear tail element */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) @@ -1098,6 +1124,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ { \ uint32_t vl =3D env->vl; \ uint32_t vm =3D vext_vm(desc); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1107,6 +1136,12 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, \ DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ } \ env->vstart =3D 0; \ + /* clear tail element */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) @@ -1157,18 +1192,18 @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) -GEN_VEXT_VX(vand_vx_b) -GEN_VEXT_VX(vand_vx_h) -GEN_VEXT_VX(vand_vx_w) -GEN_VEXT_VX(vand_vx_d) -GEN_VEXT_VX(vor_vx_b) -GEN_VEXT_VX(vor_vx_h) -GEN_VEXT_VX(vor_vx_w) -GEN_VEXT_VX(vor_vx_d) -GEN_VEXT_VX(vxor_vx_b) -GEN_VEXT_VX(vxor_vx_h) -GEN_VEXT_VX(vxor_vx_w) -GEN_VEXT_VX(vxor_vx_d) +GEN_VEXT_VX(vand_vx_b, uint8_t) +GEN_VEXT_VX(vand_vx_h, uint16_t) +GEN_VEXT_VX(vand_vx_w, uint32_t) +GEN_VEXT_VX(vand_vx_d, uint64_t) +GEN_VEXT_VX(vor_vx_b, uint8_t) +GEN_VEXT_VX(vor_vx_h, uint16_t) +GEN_VEXT_VX(vor_vx_w, uint32_t) +GEN_VEXT_VX(vor_vx_d, uint64_t) +GEN_VEXT_VX(vxor_vx_b, uint8_t) +GEN_VEXT_VX(vxor_vx_h, uint16_t) +GEN_VEXT_VX(vxor_vx_w, uint32_t) +GEN_VEXT_VX(vxor_vx_d, uint64_t) =20 /* Vector Single-Width Bit Shift Instructions */ #define DO_SLL(N, M) (N << (M)) @@ -1422,22 +1457,22 @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) -GEN_VEXT_VX(vminu_vx_b) -GEN_VEXT_VX(vminu_vx_h) -GEN_VEXT_VX(vminu_vx_w) -GEN_VEXT_VX(vminu_vx_d) -GEN_VEXT_VX(vmin_vx_b) -GEN_VEXT_VX(vmin_vx_h) -GEN_VEXT_VX(vmin_vx_w) -GEN_VEXT_VX(vmin_vx_d) -GEN_VEXT_VX(vmaxu_vx_b) -GEN_VEXT_VX(vmaxu_vx_h) -GEN_VEXT_VX(vmaxu_vx_w) -GEN_VEXT_VX(vmaxu_vx_d) -GEN_VEXT_VX(vmax_vx_b) -GEN_VEXT_VX(vmax_vx_h) -GEN_VEXT_VX(vmax_vx_w) -GEN_VEXT_VX(vmax_vx_d) +GEN_VEXT_VX(vminu_vx_b, uint8_t) +GEN_VEXT_VX(vminu_vx_h, uint16_t) +GEN_VEXT_VX(vminu_vx_w, uint32_t) +GEN_VEXT_VX(vminu_vx_d, uint64_t) +GEN_VEXT_VX(vmin_vx_b, uint8_t) +GEN_VEXT_VX(vmin_vx_h, uint16_t) +GEN_VEXT_VX(vmin_vx_w, uint32_t) +GEN_VEXT_VX(vmin_vx_d, uint64_t) +GEN_VEXT_VX(vmaxu_vx_b, uint8_t) +GEN_VEXT_VX(vmaxu_vx_h, uint16_t) +GEN_VEXT_VX(vmaxu_vx_w, uint32_t) +GEN_VEXT_VX(vmaxu_vx_d, uint64_t) +GEN_VEXT_VX(vmax_vx_b, uint8_t) +GEN_VEXT_VX(vmax_vx_h, uint16_t) +GEN_VEXT_VX(vmax_vx_w, uint32_t) +GEN_VEXT_VX(vmax_vx_d, uint64_t) =20 /* Vector Single-Width Integer Multiply Instructions */ #define DO_MUL(N, M) (N * M) @@ -1581,22 +1616,22 @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_= mulhsu_b) RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) -GEN_VEXT_VX(vmul_vx_b) -GEN_VEXT_VX(vmul_vx_h) -GEN_VEXT_VX(vmul_vx_w) -GEN_VEXT_VX(vmul_vx_d) -GEN_VEXT_VX(vmulh_vx_b) -GEN_VEXT_VX(vmulh_vx_h) -GEN_VEXT_VX(vmulh_vx_w) -GEN_VEXT_VX(vmulh_vx_d) -GEN_VEXT_VX(vmulhu_vx_b) -GEN_VEXT_VX(vmulhu_vx_h) -GEN_VEXT_VX(vmulhu_vx_w) -GEN_VEXT_VX(vmulhu_vx_d) -GEN_VEXT_VX(vmulhsu_vx_b) -GEN_VEXT_VX(vmulhsu_vx_h) -GEN_VEXT_VX(vmulhsu_vx_w) -GEN_VEXT_VX(vmulhsu_vx_d) +GEN_VEXT_VX(vmul_vx_b, uint8_t) +GEN_VEXT_VX(vmul_vx_h, uint16_t) +GEN_VEXT_VX(vmul_vx_w, uint32_t) +GEN_VEXT_VX(vmul_vx_d, uint64_t) +GEN_VEXT_VX(vmulh_vx_b, uint8_t) +GEN_VEXT_VX(vmulh_vx_h, uint16_t) +GEN_VEXT_VX(vmulh_vx_w, uint32_t) +GEN_VEXT_VX(vmulh_vx_d, uint64_t) +GEN_VEXT_VX(vmulhu_vx_b, uint8_t) +GEN_VEXT_VX(vmulhu_vx_h, uint16_t) +GEN_VEXT_VX(vmulhu_vx_w, uint32_t) +GEN_VEXT_VX(vmulhu_vx_d, uint64_t) +GEN_VEXT_VX(vmulhsu_vx_b, uint8_t) +GEN_VEXT_VX(vmulhsu_vx_h, uint16_t) +GEN_VEXT_VX(vmulhsu_vx_w, uint32_t) +GEN_VEXT_VX(vmulhsu_vx_d, uint64_t) =20 /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M =3D=3D 0) ? (__typeof(N))(-1) : N / M) @@ -1655,22 +1690,22 @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) -GEN_VEXT_VX(vdivu_vx_b) -GEN_VEXT_VX(vdivu_vx_h) -GEN_VEXT_VX(vdivu_vx_w) -GEN_VEXT_VX(vdivu_vx_d) -GEN_VEXT_VX(vdiv_vx_b) -GEN_VEXT_VX(vdiv_vx_h) -GEN_VEXT_VX(vdiv_vx_w) -GEN_VEXT_VX(vdiv_vx_d) -GEN_VEXT_VX(vremu_vx_b) -GEN_VEXT_VX(vremu_vx_h) -GEN_VEXT_VX(vremu_vx_w) -GEN_VEXT_VX(vremu_vx_d) -GEN_VEXT_VX(vrem_vx_b) -GEN_VEXT_VX(vrem_vx_h) -GEN_VEXT_VX(vrem_vx_w) -GEN_VEXT_VX(vrem_vx_d) +GEN_VEXT_VX(vdivu_vx_b, uint8_t) +GEN_VEXT_VX(vdivu_vx_h, uint16_t) +GEN_VEXT_VX(vdivu_vx_w, uint32_t) +GEN_VEXT_VX(vdivu_vx_d, uint64_t) +GEN_VEXT_VX(vdiv_vx_b, uint8_t) +GEN_VEXT_VX(vdiv_vx_h, uint16_t) +GEN_VEXT_VX(vdiv_vx_w, uint32_t) +GEN_VEXT_VX(vdiv_vx_d, uint64_t) +GEN_VEXT_VX(vremu_vx_b, uint8_t) +GEN_VEXT_VX(vremu_vx_h, uint16_t) +GEN_VEXT_VX(vremu_vx_w, uint32_t) +GEN_VEXT_VX(vremu_vx_d, uint64_t) +GEN_VEXT_VX(vrem_vx_b, uint8_t) +GEN_VEXT_VX(vrem_vx_h, uint16_t) +GEN_VEXT_VX(vrem_vx_w, uint32_t) +GEN_VEXT_VX(vrem_vx_d, uint64_t) =20 /* Vector Widening Integer Multiply Instructions */ RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) @@ -1701,15 +1736,15 @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_= MUL) RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) -GEN_VEXT_VX(vwmul_vx_b) -GEN_VEXT_VX(vwmul_vx_h) -GEN_VEXT_VX(vwmul_vx_w) -GEN_VEXT_VX(vwmulu_vx_b) -GEN_VEXT_VX(vwmulu_vx_h) -GEN_VEXT_VX(vwmulu_vx_w) -GEN_VEXT_VX(vwmulsu_vx_b) -GEN_VEXT_VX(vwmulsu_vx_h) -GEN_VEXT_VX(vwmulsu_vx_w) +GEN_VEXT_VX(vwmul_vx_b, uint16_t) +GEN_VEXT_VX(vwmul_vx_h, uint32_t) +GEN_VEXT_VX(vwmul_vx_w, uint64_t) +GEN_VEXT_VX(vwmulu_vx_b, uint16_t) +GEN_VEXT_VX(vwmulu_vx_h, uint32_t) +GEN_VEXT_VX(vwmulu_vx_w, uint64_t) +GEN_VEXT_VX(vwmulsu_vx_b, uint16_t) +GEN_VEXT_VX(vwmulsu_vx_h, uint32_t) +GEN_VEXT_VX(vwmulsu_vx_w, uint64_t) =20 /* Vector Single-Width Integer Multiply-Add Instructions */ #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -1782,22 +1817,22 @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_N= MSUB) RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) -GEN_VEXT_VX(vmacc_vx_b) -GEN_VEXT_VX(vmacc_vx_h) -GEN_VEXT_VX(vmacc_vx_w) -GEN_VEXT_VX(vmacc_vx_d) -GEN_VEXT_VX(vnmsac_vx_b) -GEN_VEXT_VX(vnmsac_vx_h) -GEN_VEXT_VX(vnmsac_vx_w) -GEN_VEXT_VX(vnmsac_vx_d) -GEN_VEXT_VX(vmadd_vx_b) -GEN_VEXT_VX(vmadd_vx_h) -GEN_VEXT_VX(vmadd_vx_w) -GEN_VEXT_VX(vmadd_vx_d) -GEN_VEXT_VX(vnmsub_vx_b) -GEN_VEXT_VX(vnmsub_vx_h) -GEN_VEXT_VX(vnmsub_vx_w) -GEN_VEXT_VX(vnmsub_vx_d) +GEN_VEXT_VX(vmacc_vx_b, uint8_t) +GEN_VEXT_VX(vmacc_vx_h, uint16_t) +GEN_VEXT_VX(vmacc_vx_w, uint32_t) +GEN_VEXT_VX(vmacc_vx_d, uint64_t) +GEN_VEXT_VX(vnmsac_vx_b, uint8_t) +GEN_VEXT_VX(vnmsac_vx_h, uint16_t) +GEN_VEXT_VX(vnmsac_vx_w, uint32_t) +GEN_VEXT_VX(vnmsac_vx_d, uint64_t) +GEN_VEXT_VX(vmadd_vx_b, uint8_t) +GEN_VEXT_VX(vmadd_vx_h, uint16_t) +GEN_VEXT_VX(vmadd_vx_w, uint32_t) +GEN_VEXT_VX(vmadd_vx_d, uint64_t) +GEN_VEXT_VX(vnmsub_vx_b, uint8_t) +GEN_VEXT_VX(vnmsub_vx_h, uint16_t) +GEN_VEXT_VX(vnmsub_vx_w, uint32_t) +GEN_VEXT_VX(vnmsub_vx_d, uint64_t) =20 /* Vector Widening Integer Multiply-Add Instructions */ RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) @@ -1831,18 +1866,18 @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, D= O_MACC) RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) -GEN_VEXT_VX(vwmaccu_vx_b) -GEN_VEXT_VX(vwmaccu_vx_h) -GEN_VEXT_VX(vwmaccu_vx_w) -GEN_VEXT_VX(vwmacc_vx_b) -GEN_VEXT_VX(vwmacc_vx_h) -GEN_VEXT_VX(vwmacc_vx_w) -GEN_VEXT_VX(vwmaccsu_vx_b) -GEN_VEXT_VX(vwmaccsu_vx_h) -GEN_VEXT_VX(vwmaccsu_vx_w) -GEN_VEXT_VX(vwmaccus_vx_b) -GEN_VEXT_VX(vwmaccus_vx_h) -GEN_VEXT_VX(vwmaccus_vx_w) +GEN_VEXT_VX(vwmaccu_vx_b, uint16_t) +GEN_VEXT_VX(vwmaccu_vx_h, uint32_t) +GEN_VEXT_VX(vwmaccu_vx_w, uint64_t) +GEN_VEXT_VX(vwmacc_vx_b, uint16_t) +GEN_VEXT_VX(vwmacc_vx_h, uint32_t) +GEN_VEXT_VX(vwmacc_vx_w, uint64_t) +GEN_VEXT_VX(vwmaccsu_vx_b, uint16_t) +GEN_VEXT_VX(vwmaccsu_vx_h, uint32_t) +GEN_VEXT_VX(vwmaccsu_vx_w, uint64_t) +GEN_VEXT_VX(vwmaccus_vx_b, uint16_t) +GEN_VEXT_VX(vwmaccus_vx_h, uint32_t) +GEN_VEXT_VX(vwmaccus_vx_w, uint64_t) =20 /* Vector Integer Merge and Move Instructions */ #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647694723253773.5922927379265; Sat, 19 Mar 2022 05:58:43 -0700 (PDT) Received: from localhost ([::1]:53970 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYfK-0007UO-69 for importer@patchew.org; Sat, 19 Mar 2022 08:58:42 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57594) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXf-0004oZ-Gn; Sat, 19 Mar 2022 08:50:48 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:35996) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXd-0003YK-Ry; Sat, 19 Mar 2022 08:50:47 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 615F311EF5E; Sat, 19 Mar 2022 12:50:41 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 01:38:18 -0800 Subject: [PATCH qemu 06/13] target/riscv: rvv: Add tail agnostic for vector integer shift instructions Message-ID: <164769423983.18409.14760549429989700286-6@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647694725350100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 1 + target/riscv/vector_helper.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index ac6379a8fc..f1838fbf11 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1871,6 +1871,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 56a32adcb9..6d79908ffe 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1216,6 +1216,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ { \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(TS1); = \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -1227,6 +1230,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ *((TS1 *)vd + HS1(i)) =3D OP(s2, s1 & MASK); = \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) @@ -1251,6 +1256,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(TD); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1261,6 +1270,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ *((TD *)vd + HD(i)) =3D OP(s2, s1 & MASK); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647695197436890.6613366644326; Sat, 19 Mar 2022 06:06:37 -0700 (PDT) Received: from localhost ([::1]:36104 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYmy-0006CI-Dd for importer@patchew.org; Sat, 19 Mar 2022 09:06:36 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57602) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXg-0004oc-0Z; Sat, 19 Mar 2022 08:50:48 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:35998) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXe-0003YW-Ft; Sat, 19 Mar 2022 08:50:47 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 874D211EF61; Sat, 19 Mar 2022 12:50:41 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 01:43:53 -0800 Subject: [PATCH qemu 07/13] target/riscv: rvv: Add tail agnostic for vector integer comparison instructions Message-ID: <164769423983.18409.14760549429989700286-7@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647695198572100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 6d79908ffe..9a08d14689 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1317,6 +1317,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1328,6 +1331,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ } \ env->vstart =3D 0; \ + /* clear tail element */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) @@ -1366,6 +1375,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1377,6 +1389,12 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, void *vs2, \ DO_OP(s2, (ETYPE)(target_long)s1)); \ } \ env->vstart =3D 0; \ + /* clear tail element */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647694502860119.84009588675258; Sat, 19 Mar 2022 05:55:02 -0700 (PDT) Received: from localhost ([::1]:42416 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYbl-0007y3-Ln for importer@patchew.org; Sat, 19 Mar 2022 08:55:01 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57604) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXg-0004od-8I; Sat, 19 Mar 2022 08:50:48 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36000) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXe-0003YX-G4; Sat, 19 Mar 2022 08:50:47 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id A217F11EF6A; Sat, 19 Mar 2022 12:50:41 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 01:53:23 -0800 Subject: [PATCH qemu 08/13] target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions Message-ID: <164769423983.18409.14760549429989700286-8@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647694504375100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 4 ++++ target/riscv/vector_helper.c | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index f1838fbf11..dec5d638c8 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -2073,6 +2073,7 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_= v *a) MAXSZ(s), MAXSZ(s)); } else { uint32_t data =3D FIELD_DP32(0, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_2_ptr * const fns[4] =3D { gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, @@ -2115,6 +2116,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_= x *a) TCGv_i64 s1_i64 =3D tcg_temp_new_i64(); TCGv_ptr dest =3D tcg_temp_new_ptr(); uint32_t data =3D FIELD_DP32(0, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[4] =3D { gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, @@ -2153,6 +2155,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_= i *a) TCGv_i64 s1; TCGv_ptr dest; uint32_t data =3D FIELD_DP32(0, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[4] =3D { gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, @@ -2736,6 +2739,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_= v_f *a) TCGv_ptr dest; TCGv_i32 desc; uint32_t data =3D FIELD_DP32(0, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[3] =3D { gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 9a08d14689..ce96ae61dc 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1915,6 +1915,9 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState = *env, \ uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1922,6 +1925,9 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState = *env, \ *((ETYPE *)vd + H(i)) =3D s1; \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) @@ -1934,12 +1940,18 @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVSt= ate *env, \ uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ *((ETYPE *)vd + H(i)) =3D (ETYPE)s1; \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) @@ -1952,6 +1964,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1959,6 +1974,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ *((ETYPE *)vd + H(i)) =3D *(vt + H(i)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) @@ -1971,6 +1989,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1980,6 +2001,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ *((ETYPE *)vd + H(i)) =3D d; \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647694708961538.8292694450435; Sat, 19 Mar 2022 05:58:28 -0700 (PDT) Received: from localhost ([::1]:52786 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYf5-0006j4-I2 for importer@patchew.org; Sat, 19 Mar 2022 08:58:27 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57714) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYY2-0005KB-Dn; Sat, 19 Mar 2022 08:51:10 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36004) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXy-0003Yh-S4; Sat, 19 Mar 2022 08:51:10 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id BEE6411EF81; Sat, 19 Mar 2022 12:50:41 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 02:04:21 -0800 Subject: [PATCH qemu 09/13] target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions Message-ID: <164769423983.18409.14760549429989700286-9@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647694709167100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 220 ++++++++++++++++++----------------- 1 file changed, 114 insertions(+), 106 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index ce96ae61dc..a025e4b640 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2053,10 +2053,12 @@ static inline void vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - opivv2_rm_fn *fn) + opivv2_rm_fn *fn, uint32_t esz) { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); =20 switch (env->vxrm) { case 0: /* rnu */ @@ -2076,15 +2078,17 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *v= s2, env, vl, vm, 3, fn); break; } + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); } =20 /* generate helpers for fixed point instructions with OPIVV format */ -#define GEN_VEXT_VV_RM(NAME) \ +#define GEN_VEXT_VV_RM(NAME, ETYPE) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, sizeof(ETYPE)); \ } =20 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint= 8_t b) @@ -2134,10 +2138,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H= 1, saddu8) RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) -GEN_VEXT_VV_RM(vsaddu_vv_b) -GEN_VEXT_VV_RM(vsaddu_vv_h) -GEN_VEXT_VV_RM(vsaddu_vv_w) -GEN_VEXT_VV_RM(vsaddu_vv_d) +GEN_VEXT_VV_RM(vsaddu_vv_b, uint8_t) +GEN_VEXT_VV_RM(vsaddu_vv_h, uint16_t) +GEN_VEXT_VV_RM(vsaddu_vv_w, uint32_t) +GEN_VEXT_VV_RM(vsaddu_vv_d, uint64_t) =20 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, CPURISCVState *env, int vxrm); @@ -2170,10 +2174,12 @@ static inline void vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - opivx2_rm_fn *fn) + opivx2_rm_fn *fn, uint32_t esz) { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); =20 switch (env->vxrm) { case 0: /* rnu */ @@ -2193,25 +2199,27 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, vo= id *vs2, env, vl, vm, 3, fn); break; } + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); } =20 /* generate helpers for fixed point instructions with OPIVX format */ -#define GEN_VEXT_VX_RM(NAME) \ +#define GEN_VEXT_VX_RM(NAME, ETYPE) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, sizeof(ETYPE)); \ } =20 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) -GEN_VEXT_VX_RM(vsaddu_vx_b) -GEN_VEXT_VX_RM(vsaddu_vx_h) -GEN_VEXT_VX_RM(vsaddu_vx_w) -GEN_VEXT_VX_RM(vsaddu_vx_d) +GEN_VEXT_VX_RM(vsaddu_vx_b, uint8_t) +GEN_VEXT_VX_RM(vsaddu_vx_h, uint16_t) +GEN_VEXT_VX_RM(vsaddu_vx_w, uint32_t) +GEN_VEXT_VX_RM(vsaddu_vx_d, uint64_t) =20 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t = b) { @@ -2257,19 +2265,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1= , sadd8) RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) -GEN_VEXT_VV_RM(vsadd_vv_b) -GEN_VEXT_VV_RM(vsadd_vv_h) -GEN_VEXT_VV_RM(vsadd_vv_w) -GEN_VEXT_VV_RM(vsadd_vv_d) +GEN_VEXT_VV_RM(vsadd_vv_b, uint8_t) +GEN_VEXT_VV_RM(vsadd_vv_h, uint16_t) +GEN_VEXT_VV_RM(vsadd_vv_w, uint32_t) +GEN_VEXT_VV_RM(vsadd_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) -GEN_VEXT_VX_RM(vsadd_vx_b) -GEN_VEXT_VX_RM(vsadd_vx_h) -GEN_VEXT_VX_RM(vsadd_vx_w) -GEN_VEXT_VX_RM(vsadd_vx_d) +GEN_VEXT_VX_RM(vsadd_vx_b, uint8_t) +GEN_VEXT_VX_RM(vsadd_vx_h, uint16_t) +GEN_VEXT_VX_RM(vsadd_vx_w, uint32_t) +GEN_VEXT_VX_RM(vsadd_vx_d, uint64_t) =20 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint= 8_t b) { @@ -2318,19 +2326,19 @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H= 1, ssubu8) RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) -GEN_VEXT_VV_RM(vssubu_vv_b) -GEN_VEXT_VV_RM(vssubu_vv_h) -GEN_VEXT_VV_RM(vssubu_vv_w) -GEN_VEXT_VV_RM(vssubu_vv_d) +GEN_VEXT_VV_RM(vssubu_vv_b, uint8_t) +GEN_VEXT_VV_RM(vssubu_vv_h, uint16_t) +GEN_VEXT_VV_RM(vssubu_vv_w, uint32_t) +GEN_VEXT_VV_RM(vssubu_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) -GEN_VEXT_VX_RM(vssubu_vx_b) -GEN_VEXT_VX_RM(vssubu_vx_h) -GEN_VEXT_VX_RM(vssubu_vx_w) -GEN_VEXT_VX_RM(vssubu_vx_d) +GEN_VEXT_VX_RM(vssubu_vx_b, uint8_t) +GEN_VEXT_VX_RM(vssubu_vx_h, uint16_t) +GEN_VEXT_VX_RM(vssubu_vx_w, uint32_t) +GEN_VEXT_VX_RM(vssubu_vx_d, uint64_t) =20 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t = b) { @@ -2376,19 +2384,19 @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1= , ssub8) RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) -GEN_VEXT_VV_RM(vssub_vv_b) -GEN_VEXT_VV_RM(vssub_vv_h) -GEN_VEXT_VV_RM(vssub_vv_w) -GEN_VEXT_VV_RM(vssub_vv_d) +GEN_VEXT_VV_RM(vssub_vv_b, uint8_t) +GEN_VEXT_VV_RM(vssub_vv_h, uint16_t) +GEN_VEXT_VV_RM(vssub_vv_w, uint32_t) +GEN_VEXT_VV_RM(vssub_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) -GEN_VEXT_VX_RM(vssub_vx_b) -GEN_VEXT_VX_RM(vssub_vx_h) -GEN_VEXT_VX_RM(vssub_vx_w) -GEN_VEXT_VX_RM(vssub_vx_d) +GEN_VEXT_VX_RM(vssub_vx_b, uint8_t) +GEN_VEXT_VX_RM(vssub_vx_h, uint16_t) +GEN_VEXT_VX_RM(vssub_vx_w, uint32_t) +GEN_VEXT_VX_RM(vssub_vx_d, uint64_t) =20 /* Vector Single-Width Averaging Add and Subtract */ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) @@ -2440,19 +2448,19 @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1= , aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) -GEN_VEXT_VV_RM(vaadd_vv_b) -GEN_VEXT_VV_RM(vaadd_vv_h) -GEN_VEXT_VV_RM(vaadd_vv_w) -GEN_VEXT_VV_RM(vaadd_vv_d) +GEN_VEXT_VV_RM(vaadd_vv_b, uint8_t) +GEN_VEXT_VV_RM(vaadd_vv_h, uint16_t) +GEN_VEXT_VV_RM(vaadd_vv_w, uint32_t) +GEN_VEXT_VV_RM(vaadd_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) -GEN_VEXT_VX_RM(vaadd_vx_b) -GEN_VEXT_VX_RM(vaadd_vx_h) -GEN_VEXT_VX_RM(vaadd_vx_w) -GEN_VEXT_VX_RM(vaadd_vx_d) +GEN_VEXT_VX_RM(vaadd_vx_b, uint8_t) +GEN_VEXT_VX_RM(vaadd_vx_h, uint16_t) +GEN_VEXT_VX_RM(vaadd_vx_w, uint32_t) +GEN_VEXT_VX_RM(vaadd_vx_d, uint64_t) =20 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2477,19 +2485,19 @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H= 1, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) -GEN_VEXT_VV_RM(vaaddu_vv_b) -GEN_VEXT_VV_RM(vaaddu_vv_h) -GEN_VEXT_VV_RM(vaaddu_vv_w) -GEN_VEXT_VV_RM(vaaddu_vv_d) +GEN_VEXT_VV_RM(vaaddu_vv_b, uint8_t) +GEN_VEXT_VV_RM(vaaddu_vv_h, uint16_t) +GEN_VEXT_VV_RM(vaaddu_vv_w, uint32_t) +GEN_VEXT_VV_RM(vaaddu_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) -GEN_VEXT_VX_RM(vaaddu_vx_b) -GEN_VEXT_VX_RM(vaaddu_vx_h) -GEN_VEXT_VX_RM(vaaddu_vx_w) -GEN_VEXT_VX_RM(vaaddu_vx_d) +GEN_VEXT_VX_RM(vaaddu_vx_b, uint8_t) +GEN_VEXT_VX_RM(vaaddu_vx_h, uint16_t) +GEN_VEXT_VX_RM(vaaddu_vx_w, uint32_t) +GEN_VEXT_VX_RM(vaaddu_vx_d, uint64_t) =20 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int3= 2_t b) { @@ -2513,19 +2521,19 @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1= , asub32) RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) -GEN_VEXT_VV_RM(vasub_vv_b) -GEN_VEXT_VV_RM(vasub_vv_h) -GEN_VEXT_VV_RM(vasub_vv_w) -GEN_VEXT_VV_RM(vasub_vv_d) +GEN_VEXT_VV_RM(vasub_vv_b, uint8_t) +GEN_VEXT_VV_RM(vasub_vv_h, uint16_t) +GEN_VEXT_VV_RM(vasub_vv_w, uint32_t) +GEN_VEXT_VV_RM(vasub_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) -GEN_VEXT_VX_RM(vasub_vx_b) -GEN_VEXT_VX_RM(vasub_vx_h) -GEN_VEXT_VX_RM(vasub_vx_w) -GEN_VEXT_VX_RM(vasub_vx_d) +GEN_VEXT_VX_RM(vasub_vx_b, uint8_t) +GEN_VEXT_VX_RM(vasub_vx_h, uint16_t) +GEN_VEXT_VX_RM(vasub_vx_w, uint32_t) +GEN_VEXT_VX_RM(vasub_vx_d, uint64_t) =20 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2550,19 +2558,19 @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H= 1, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) -GEN_VEXT_VV_RM(vasubu_vv_b) -GEN_VEXT_VV_RM(vasubu_vv_h) -GEN_VEXT_VV_RM(vasubu_vv_w) -GEN_VEXT_VV_RM(vasubu_vv_d) +GEN_VEXT_VV_RM(vasubu_vv_b, uint8_t) +GEN_VEXT_VV_RM(vasubu_vv_h, uint16_t) +GEN_VEXT_VV_RM(vasubu_vv_w, uint32_t) +GEN_VEXT_VV_RM(vasubu_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) -GEN_VEXT_VX_RM(vasubu_vx_b) -GEN_VEXT_VX_RM(vasubu_vx_h) -GEN_VEXT_VX_RM(vasubu_vx_w) -GEN_VEXT_VX_RM(vasubu_vx_d) +GEN_VEXT_VX_RM(vasubu_vx_b, uint8_t) +GEN_VEXT_VX_RM(vasubu_vx_h, uint16_t) +GEN_VEXT_VX_RM(vasubu_vx_w, uint32_t) +GEN_VEXT_VX_RM(vasubu_vx_d, uint64_t) =20 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t= b) @@ -2657,19 +2665,19 @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1= , vsmul8) RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) -GEN_VEXT_VV_RM(vsmul_vv_b) -GEN_VEXT_VV_RM(vsmul_vv_h) -GEN_VEXT_VV_RM(vsmul_vv_w) -GEN_VEXT_VV_RM(vsmul_vv_d) +GEN_VEXT_VV_RM(vsmul_vv_b, uint8_t) +GEN_VEXT_VV_RM(vsmul_vv_h, uint16_t) +GEN_VEXT_VV_RM(vsmul_vv_w, uint32_t) +GEN_VEXT_VV_RM(vsmul_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) -GEN_VEXT_VX_RM(vsmul_vx_b) -GEN_VEXT_VX_RM(vsmul_vx_h) -GEN_VEXT_VX_RM(vsmul_vx_w) -GEN_VEXT_VX_RM(vsmul_vx_d) +GEN_VEXT_VX_RM(vsmul_vx_b, uint8_t) +GEN_VEXT_VX_RM(vsmul_vx_h, uint16_t) +GEN_VEXT_VX_RM(vsmul_vx_w, uint32_t) +GEN_VEXT_VX_RM(vsmul_vx_d, uint64_t) =20 /* Vector Single-Width Scaling Shift Instructions */ static inline uint8_t @@ -2716,19 +2724,19 @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1= , vssrl8) RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) -GEN_VEXT_VV_RM(vssrl_vv_b) -GEN_VEXT_VV_RM(vssrl_vv_h) -GEN_VEXT_VV_RM(vssrl_vv_w) -GEN_VEXT_VV_RM(vssrl_vv_d) +GEN_VEXT_VV_RM(vssrl_vv_b, uint8_t) +GEN_VEXT_VV_RM(vssrl_vv_h, uint16_t) +GEN_VEXT_VV_RM(vssrl_vv_w, uint32_t) +GEN_VEXT_VV_RM(vssrl_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) -GEN_VEXT_VX_RM(vssrl_vx_b) -GEN_VEXT_VX_RM(vssrl_vx_h) -GEN_VEXT_VX_RM(vssrl_vx_w) -GEN_VEXT_VX_RM(vssrl_vx_d) +GEN_VEXT_VX_RM(vssrl_vx_b, uint8_t) +GEN_VEXT_VX_RM(vssrl_vx_h, uint16_t) +GEN_VEXT_VX_RM(vssrl_vx_w, uint32_t) +GEN_VEXT_VX_RM(vssrl_vx_d, uint64_t) =20 static inline int8_t vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -2775,19 +2783,19 @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1= , vssra8) RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) -GEN_VEXT_VV_RM(vssra_vv_b) -GEN_VEXT_VV_RM(vssra_vv_h) -GEN_VEXT_VV_RM(vssra_vv_w) -GEN_VEXT_VV_RM(vssra_vv_d) +GEN_VEXT_VV_RM(vssra_vv_b, uint8_t) +GEN_VEXT_VV_RM(vssra_vv_h, uint16_t) +GEN_VEXT_VV_RM(vssra_vv_w, uint32_t) +GEN_VEXT_VV_RM(vssra_vv_d, uint64_t) =20 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) -GEN_VEXT_VX_RM(vssra_vx_b) -GEN_VEXT_VX_RM(vssra_vx_h) -GEN_VEXT_VX_RM(vssra_vx_w) -GEN_VEXT_VX_RM(vssra_vx_d) +GEN_VEXT_VX_RM(vssra_vx_b, uint8_t) +GEN_VEXT_VX_RM(vssra_vx_h, uint16_t) +GEN_VEXT_VX_RM(vssra_vx_w, uint32_t) +GEN_VEXT_VX_RM(vssra_vx_d, uint64_t) =20 /* Vector Narrowing Fixed-Point Clip Instructions */ static inline int8_t @@ -2850,16 +2858,16 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, i= nt32_t b) RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) -GEN_VEXT_VV_RM(vnclip_wv_b) -GEN_VEXT_VV_RM(vnclip_wv_h) -GEN_VEXT_VV_RM(vnclip_wv_w) +GEN_VEXT_VV_RM(vnclip_wv_b, uint8_t) +GEN_VEXT_VV_RM(vnclip_wv_h, uint16_t) +GEN_VEXT_VV_RM(vnclip_wv_w, uint32_t) =20 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) -GEN_VEXT_VX_RM(vnclip_wx_b) -GEN_VEXT_VX_RM(vnclip_wx_h) -GEN_VEXT_VX_RM(vnclip_wx_w) +GEN_VEXT_VX_RM(vnclip_wx_b, uint8_t) +GEN_VEXT_VX_RM(vnclip_wx_h, uint16_t) +GEN_VEXT_VX_RM(vnclip_wx_w, uint32_t) =20 static inline uint8_t vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) @@ -2912,16 +2920,16 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a,= uint32_t b) RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) -GEN_VEXT_VV_RM(vnclipu_wv_b) -GEN_VEXT_VV_RM(vnclipu_wv_h) -GEN_VEXT_VV_RM(vnclipu_wv_w) +GEN_VEXT_VV_RM(vnclipu_wv_b, uint8_t) +GEN_VEXT_VV_RM(vnclipu_wv_h, uint16_t) +GEN_VEXT_VV_RM(vnclipu_wv_w, uint32_t) =20 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) -GEN_VEXT_VX_RM(vnclipu_wx_b) -GEN_VEXT_VX_RM(vnclipu_wx_h) -GEN_VEXT_VX_RM(vnclipu_wx_w) +GEN_VEXT_VX_RM(vnclipu_wx_b, uint8_t) +GEN_VEXT_VX_RM(vnclipu_wx_h, uint16_t) +GEN_VEXT_VX_RM(vnclipu_wx_w, uint32_t) =20 /* *** Vector Float Point Arithmetic Instructions --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647695038671592.9962487801197; Sat, 19 Mar 2022 06:03:58 -0700 (PDT) Received: from localhost ([::1]:60730 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYkP-0003jr-Ff for importer@patchew.org; Sat, 19 Mar 2022 09:03:57 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57624) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXm-0004q0-QL; Sat, 19 Mar 2022 08:50:56 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36002) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXe-0003Yg-Kj; Sat, 19 Mar 2022 08:50:50 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id E655211EF8D; Sat, 19 Mar 2022 12:50:41 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 02:05:42 -0800 Subject: [PATCH qemu 10/13] target/riscv: rvv: Add tail agnostic for vector floating-point instructions Message-ID: <164769423983.18409.14760549429989700286-10@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647695039183100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 11 + target/riscv/vector_helper.c | 450 +++++++++++++----------- 2 files changed, 265 insertions(+), 196 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index dec5d638c8..d6b17e2712 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -2319,6 +2319,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2401,6 +2402,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ gen_set_rm(s, RISCV_FRM_DYN); \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2439,6 +2441,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2478,6 +2481,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ gen_set_rm(s, RISCV_FRM_DYN); \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2514,6 +2518,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2553,6 +2558,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ gen_set_rm(s, RISCV_FRM_DYN); \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2636,6 +2642,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, @@ -2840,6 +2847,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -2891,6 +2899,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -2958,6 +2967,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -3011,6 +3021,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index a025e4b640..63746f3321 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2944,13 +2944,17 @@ static void do_##NAME(void *vd, void *vs1, void *vs= 2, int i, \ *((TD *)vd + HD(i)) =3D OP(s2, s1, &env->fp_status); \ } =20 -#define GEN_VEXT_VV_ENV(NAME) \ +#define GEN_VEXT_VV_ENV(NAME, ETYPE) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -2960,14 +2964,18 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ do_##NAME(vd, vs1, vs2, i, env); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, \ + vl * esz, \ + vlmax * esz); \ } =20 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) -GEN_VEXT_VV_ENV(vfadd_vv_h) -GEN_VEXT_VV_ENV(vfadd_vv_w) -GEN_VEXT_VV_ENV(vfadd_vv_d) +GEN_VEXT_VV_ENV(vfadd_vv_h, float16) +GEN_VEXT_VV_ENV(vfadd_vv_w, float32) +GEN_VEXT_VV_ENV(vfadd_vv_d, float64) =20 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -2977,13 +2985,17 @@ static void do_##NAME(void *vd, uint64_t s1, void *= vs2, int i, \ *((TD *)vd + HD(i)) =3D OP(s2, (TX1)(T1)s1, &env->fp_status);\ } =20 -#define GEN_VEXT_VF(NAME) \ +#define GEN_VEXT_VF(NAME, ETYPE) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -2993,27 +3005,31 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, = \ do_##NAME(vd, s1, vs2, i, env); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, \ + vl * esz, \ + vlmax * esz); \ } =20 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) -GEN_VEXT_VF(vfadd_vf_h) -GEN_VEXT_VF(vfadd_vf_w) -GEN_VEXT_VF(vfadd_vf_d) +GEN_VEXT_VF(vfadd_vf_h, float16) +GEN_VEXT_VF(vfadd_vf_w, float32) +GEN_VEXT_VF(vfadd_vf_d, float64) =20 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) -GEN_VEXT_VV_ENV(vfsub_vv_h) -GEN_VEXT_VV_ENV(vfsub_vv_w) -GEN_VEXT_VV_ENV(vfsub_vv_d) +GEN_VEXT_VV_ENV(vfsub_vv_h, float16) +GEN_VEXT_VV_ENV(vfsub_vv_w, float32) +GEN_VEXT_VV_ENV(vfsub_vv_d, float64) RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) -GEN_VEXT_VF(vfsub_vf_h) -GEN_VEXT_VF(vfsub_vf_w) -GEN_VEXT_VF(vfsub_vf_d) +GEN_VEXT_VF(vfsub_vf_h, float16) +GEN_VEXT_VF(vfsub_vf_w, float32) +GEN_VEXT_VF(vfsub_vf_d, float64) =20 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) { @@ -3033,9 +3049,9 @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, = float_status *s) RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) -GEN_VEXT_VF(vfrsub_vf_h) -GEN_VEXT_VF(vfrsub_vf_w) -GEN_VEXT_VF(vfrsub_vf_d) +GEN_VEXT_VF(vfrsub_vf_h, float16) +GEN_VEXT_VF(vfrsub_vf_w, float32) +GEN_VEXT_VF(vfrsub_vf_d, float64) =20 /* Vector Widening Floating-Point Add/Subtract Instructions */ static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) @@ -3053,12 +3069,12 @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, fl= oat_status *s) =20 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) -GEN_VEXT_VV_ENV(vfwadd_vv_h) -GEN_VEXT_VV_ENV(vfwadd_vv_w) +GEN_VEXT_VV_ENV(vfwadd_vv_h, float32) +GEN_VEXT_VV_ENV(vfwadd_vv_w, float64) RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) -GEN_VEXT_VF(vfwadd_vf_h) -GEN_VEXT_VF(vfwadd_vf_w) +GEN_VEXT_VF(vfwadd_vf_h, float32) +GEN_VEXT_VF(vfwadd_vf_w, float64) =20 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { @@ -3075,12 +3091,12 @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, fl= oat_status *s) =20 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) -GEN_VEXT_VV_ENV(vfwsub_vv_h) -GEN_VEXT_VV_ENV(vfwsub_vv_w) +GEN_VEXT_VV_ENV(vfwsub_vv_h, float32) +GEN_VEXT_VV_ENV(vfwsub_vv_w, float64) RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) -GEN_VEXT_VF(vfwsub_vf_h) -GEN_VEXT_VF(vfwsub_vf_w) +GEN_VEXT_VF(vfwsub_vf_h, float32) +GEN_VEXT_VF(vfwsub_vf_w, float64) =20 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) { @@ -3094,12 +3110,12 @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, f= loat_status *s) =20 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) -GEN_VEXT_VV_ENV(vfwadd_wv_h) -GEN_VEXT_VV_ENV(vfwadd_wv_w) +GEN_VEXT_VV_ENV(vfwadd_wv_h, float32) +GEN_VEXT_VV_ENV(vfwadd_wv_w, float64) RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) -GEN_VEXT_VF(vfwadd_wf_h) -GEN_VEXT_VF(vfwadd_wf_w) +GEN_VEXT_VF(vfwadd_wf_h, float32) +GEN_VEXT_VF(vfwadd_wf_w, float64) =20 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) { @@ -3113,39 +3129,39 @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, f= loat_status *s) =20 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) -GEN_VEXT_VV_ENV(vfwsub_wv_h) -GEN_VEXT_VV_ENV(vfwsub_wv_w) +GEN_VEXT_VV_ENV(vfwsub_wv_h, float32) +GEN_VEXT_VV_ENV(vfwsub_wv_w, float64) RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) -GEN_VEXT_VF(vfwsub_wf_h) -GEN_VEXT_VF(vfwsub_wf_w) +GEN_VEXT_VF(vfwsub_wf_h, float32) +GEN_VEXT_VF(vfwsub_wf_w, float64) =20 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) -GEN_VEXT_VV_ENV(vfmul_vv_h) -GEN_VEXT_VV_ENV(vfmul_vv_w) -GEN_VEXT_VV_ENV(vfmul_vv_d) +GEN_VEXT_VV_ENV(vfmul_vv_h, float16) +GEN_VEXT_VV_ENV(vfmul_vv_w, float32) +GEN_VEXT_VV_ENV(vfmul_vv_d, float64) RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) -GEN_VEXT_VF(vfmul_vf_h) -GEN_VEXT_VF(vfmul_vf_w) -GEN_VEXT_VF(vfmul_vf_d) +GEN_VEXT_VF(vfmul_vf_h, float16) +GEN_VEXT_VF(vfmul_vf_w, float32) +GEN_VEXT_VF(vfmul_vf_d, float64) =20 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) -GEN_VEXT_VV_ENV(vfdiv_vv_h) -GEN_VEXT_VV_ENV(vfdiv_vv_w) -GEN_VEXT_VV_ENV(vfdiv_vv_d) +GEN_VEXT_VV_ENV(vfdiv_vv_h, float16) +GEN_VEXT_VV_ENV(vfdiv_vv_w, float32) +GEN_VEXT_VV_ENV(vfdiv_vv_d, float64) RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) -GEN_VEXT_VF(vfdiv_vf_h) -GEN_VEXT_VF(vfdiv_vf_w) -GEN_VEXT_VF(vfdiv_vf_d) +GEN_VEXT_VF(vfdiv_vf_h, float16) +GEN_VEXT_VF(vfdiv_vf_w, float32) +GEN_VEXT_VF(vfdiv_vf_d, float64) =20 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) { @@ -3165,9 +3181,9 @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, = float_status *s) RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) -GEN_VEXT_VF(vfrdiv_vf_h) -GEN_VEXT_VF(vfrdiv_vf_w) -GEN_VEXT_VF(vfrdiv_vf_d) +GEN_VEXT_VF(vfrdiv_vf_h, float16) +GEN_VEXT_VF(vfrdiv_vf_w, float32) +GEN_VEXT_VF(vfrdiv_vf_d, float64) =20 /* Vector Widening Floating-Point Multiply */ static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) @@ -3184,12 +3200,12 @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, fl= oat_status *s) } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) -GEN_VEXT_VV_ENV(vfwmul_vv_h) -GEN_VEXT_VV_ENV(vfwmul_vv_w) +GEN_VEXT_VV_ENV(vfwmul_vv_h, float32) +GEN_VEXT_VV_ENV(vfwmul_vv_w, float64) RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) -GEN_VEXT_VF(vfwmul_vf_h) -GEN_VEXT_VF(vfwmul_vf_w) +GEN_VEXT_VF(vfwmul_vf_h, float32) +GEN_VEXT_VF(vfwmul_vf_w, float64) =20 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -3220,9 +3236,9 @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint6= 4_t d, float_status *s) RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) -GEN_VEXT_VV_ENV(vfmacc_vv_h) -GEN_VEXT_VV_ENV(vfmacc_vv_w) -GEN_VEXT_VV_ENV(vfmacc_vv_d) +GEN_VEXT_VV_ENV(vfmacc_vv_h, float16) +GEN_VEXT_VV_ENV(vfmacc_vv_w, float32) +GEN_VEXT_VV_ENV(vfmacc_vv_d, float64) =20 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -3236,9 +3252,9 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs= 2, int i, \ RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) -GEN_VEXT_VF(vfmacc_vf_h) -GEN_VEXT_VF(vfmacc_vf_w) -GEN_VEXT_VF(vfmacc_vf_d) +GEN_VEXT_VF(vfmacc_vf_h, float16) +GEN_VEXT_VF(vfmacc_vf_w, float32) +GEN_VEXT_VF(vfmacc_vf_d, float64) =20 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3261,15 +3277,15 @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) -GEN_VEXT_VV_ENV(vfnmacc_vv_h) -GEN_VEXT_VV_ENV(vfnmacc_vv_w) -GEN_VEXT_VV_ENV(vfnmacc_vv_d) +GEN_VEXT_VV_ENV(vfnmacc_vv_h, float16) +GEN_VEXT_VV_ENV(vfnmacc_vv_w, float32) +GEN_VEXT_VV_ENV(vfnmacc_vv_d, float64) RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) -GEN_VEXT_VF(vfnmacc_vf_h) -GEN_VEXT_VF(vfnmacc_vf_w) -GEN_VEXT_VF(vfnmacc_vf_d) +GEN_VEXT_VF(vfnmacc_vf_h, float16) +GEN_VEXT_VF(vfnmacc_vf_w, float32) +GEN_VEXT_VF(vfnmacc_vf_d, float64) =20 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3289,15 +3305,15 @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) -GEN_VEXT_VV_ENV(vfmsac_vv_h) -GEN_VEXT_VV_ENV(vfmsac_vv_w) -GEN_VEXT_VV_ENV(vfmsac_vv_d) +GEN_VEXT_VV_ENV(vfmsac_vv_h, float16) +GEN_VEXT_VV_ENV(vfmsac_vv_w, float32) +GEN_VEXT_VV_ENV(vfmsac_vv_d, float64) RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) -GEN_VEXT_VF(vfmsac_vf_h) -GEN_VEXT_VF(vfmsac_vf_w) -GEN_VEXT_VF(vfmsac_vf_d) +GEN_VEXT_VF(vfmsac_vf_h, float16) +GEN_VEXT_VF(vfmsac_vf_w, float32) +GEN_VEXT_VF(vfmsac_vf_d, float64) =20 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3317,15 +3333,15 @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) -GEN_VEXT_VV_ENV(vfnmsac_vv_h) -GEN_VEXT_VV_ENV(vfnmsac_vv_w) -GEN_VEXT_VV_ENV(vfnmsac_vv_d) +GEN_VEXT_VV_ENV(vfnmsac_vv_h, float16) +GEN_VEXT_VV_ENV(vfnmsac_vv_w, float32) +GEN_VEXT_VV_ENV(vfnmsac_vv_d, float64) RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) -GEN_VEXT_VF(vfnmsac_vf_h) -GEN_VEXT_VF(vfnmsac_vf_w) -GEN_VEXT_VF(vfnmsac_vf_d) +GEN_VEXT_VF(vfnmsac_vf_h, float16) +GEN_VEXT_VF(vfnmsac_vf_w, float32) +GEN_VEXT_VF(vfnmsac_vf_d, float64) =20 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3345,15 +3361,15 @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) -GEN_VEXT_VV_ENV(vfmadd_vv_h) -GEN_VEXT_VV_ENV(vfmadd_vv_w) -GEN_VEXT_VV_ENV(vfmadd_vv_d) +GEN_VEXT_VV_ENV(vfmadd_vv_h, float16) +GEN_VEXT_VV_ENV(vfmadd_vv_w, float32) +GEN_VEXT_VV_ENV(vfmadd_vv_d, float64) RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) -GEN_VEXT_VF(vfmadd_vf_h) -GEN_VEXT_VF(vfmadd_vf_w) -GEN_VEXT_VF(vfmadd_vf_d) +GEN_VEXT_VF(vfmadd_vf_h, float16) +GEN_VEXT_VF(vfmadd_vf_w, float32) +GEN_VEXT_VF(vfmadd_vf_d, float64) =20 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3376,15 +3392,15 @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) -GEN_VEXT_VV_ENV(vfnmadd_vv_h) -GEN_VEXT_VV_ENV(vfnmadd_vv_w) -GEN_VEXT_VV_ENV(vfnmadd_vv_d) +GEN_VEXT_VV_ENV(vfnmadd_vv_h, float16) +GEN_VEXT_VV_ENV(vfnmadd_vv_w, float32) +GEN_VEXT_VV_ENV(vfnmadd_vv_d, float64) RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) -GEN_VEXT_VF(vfnmadd_vf_h) -GEN_VEXT_VF(vfnmadd_vf_w) -GEN_VEXT_VF(vfnmadd_vf_d) +GEN_VEXT_VF(vfnmadd_vf_h, float16) +GEN_VEXT_VF(vfnmadd_vf_w, float32) +GEN_VEXT_VF(vfnmadd_vf_d, float64) =20 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3404,15 +3420,15 @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) -GEN_VEXT_VV_ENV(vfmsub_vv_h) -GEN_VEXT_VV_ENV(vfmsub_vv_w) -GEN_VEXT_VV_ENV(vfmsub_vv_d) +GEN_VEXT_VV_ENV(vfmsub_vv_h, float16) +GEN_VEXT_VV_ENV(vfmsub_vv_w, float32) +GEN_VEXT_VV_ENV(vfmsub_vv_d, float64) RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) -GEN_VEXT_VF(vfmsub_vf_h) -GEN_VEXT_VF(vfmsub_vf_w) -GEN_VEXT_VF(vfmsub_vf_d) +GEN_VEXT_VF(vfmsub_vf_h, float16) +GEN_VEXT_VF(vfmsub_vf_w, float32) +GEN_VEXT_VF(vfmsub_vf_d, float64) =20 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3432,15 +3448,15 @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) -GEN_VEXT_VV_ENV(vfnmsub_vv_h) -GEN_VEXT_VV_ENV(vfnmsub_vv_w) -GEN_VEXT_VV_ENV(vfnmsub_vv_d) +GEN_VEXT_VV_ENV(vfnmsub_vv_h, float16) +GEN_VEXT_VV_ENV(vfnmsub_vv_w, float32) +GEN_VEXT_VV_ENV(vfnmsub_vv_d, float64) RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) -GEN_VEXT_VF(vfnmsub_vf_h) -GEN_VEXT_VF(vfnmsub_vf_w) -GEN_VEXT_VF(vfnmsub_vf_d) +GEN_VEXT_VF(vfnmsub_vf_h, float16) +GEN_VEXT_VF(vfnmsub_vf_w, float32) +GEN_VEXT_VF(vfnmsub_vf_d, float64) =20 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status = *s) @@ -3457,12 +3473,12 @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, ui= nt64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) -GEN_VEXT_VV_ENV(vfwmacc_vv_h) -GEN_VEXT_VV_ENV(vfwmacc_vv_w) +GEN_VEXT_VV_ENV(vfwmacc_vv_h, float32) +GEN_VEXT_VV_ENV(vfwmacc_vv_w, float64) RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) -GEN_VEXT_VF(vfwmacc_vf_h) -GEN_VEXT_VF(vfwmacc_vf_w) +GEN_VEXT_VF(vfwmacc_vf_h, float32) +GEN_VEXT_VF(vfwmacc_vf_w, float64) =20 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status= *s) { @@ -3480,12 +3496,12 @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, u= int64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) -GEN_VEXT_VV_ENV(vfwnmacc_vv_h) -GEN_VEXT_VV_ENV(vfwnmacc_vv_w) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, float32) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, float64) RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) -GEN_VEXT_VF(vfwnmacc_vf_h) -GEN_VEXT_VF(vfwnmacc_vf_w) +GEN_VEXT_VF(vfwnmacc_vf_h, float32) +GEN_VEXT_VF(vfwnmacc_vf_w, float64) =20 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status = *s) { @@ -3503,12 +3519,12 @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, ui= nt64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) -GEN_VEXT_VV_ENV(vfwmsac_vv_h) -GEN_VEXT_VV_ENV(vfwmsac_vv_w) +GEN_VEXT_VV_ENV(vfwmsac_vv_h, float32) +GEN_VEXT_VV_ENV(vfwmsac_vv_w, float64) RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) -GEN_VEXT_VF(vfwmsac_vf_h) -GEN_VEXT_VF(vfwmsac_vf_w) +GEN_VEXT_VF(vfwmsac_vf_h, float32) +GEN_VEXT_VF(vfwmsac_vf_w, float64) =20 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status= *s) { @@ -3526,12 +3542,12 @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, u= int64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) -GEN_VEXT_VV_ENV(vfwnmsac_vv_h) -GEN_VEXT_VV_ENV(vfwnmsac_vv_w) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, float32) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, float64) RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) -GEN_VEXT_VF(vfwnmsac_vf_h) -GEN_VEXT_VF(vfwnmsac_vf_w) +GEN_VEXT_VF(vfwnmsac_vf_h, float32) +GEN_VEXT_VF(vfwnmsac_vf_w, float64) =20 /* Vector Floating-Point Square-Root Instruction */ /* (TD, T2, TX2) */ @@ -3547,12 +3563,16 @@ static void do_##NAME(void *vd, void *vs2, int i, = \ *((TD *)vd + HD(i)) =3D OP(s2, &env->fp_status); \ } =20 -#define GEN_VEXT_V_ENV(NAME) \ +#define GEN_VEXT_V_ENV(NAME, ETYPE) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ if (vl =3D=3D 0) { \ @@ -3565,14 +3585,18 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ do_##NAME(vd, vs2, i, env); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, \ + vl * esz, \ + vlmax * esz); \ } =20 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) -GEN_VEXT_V_ENV(vfsqrt_v_h) -GEN_VEXT_V_ENV(vfsqrt_v_w) -GEN_VEXT_V_ENV(vfsqrt_v_d) +GEN_VEXT_V_ENV(vfsqrt_v_h, float16) +GEN_VEXT_V_ENV(vfsqrt_v_w, float32) +GEN_VEXT_V_ENV(vfsqrt_v_d, float64) =20 /* * Vector Floating-Point Reciprocal Square-Root Estimate Instruction @@ -3752,9 +3776,9 @@ static float64 frsqrt7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) -GEN_VEXT_V_ENV(vfrsqrt7_v_h) -GEN_VEXT_V_ENV(vfrsqrt7_v_w) -GEN_VEXT_V_ENV(vfrsqrt7_v_d) +GEN_VEXT_V_ENV(vfrsqrt7_v_h, float16) +GEN_VEXT_V_ENV(vfrsqrt7_v_w, float32) +GEN_VEXT_V_ENV(vfrsqrt7_v_d, float64) =20 /* * Vector Floating-Point Reciprocal Estimate Instruction @@ -3943,36 +3967,36 @@ static float64 frec7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) -GEN_VEXT_V_ENV(vfrec7_v_h) -GEN_VEXT_V_ENV(vfrec7_v_w) -GEN_VEXT_V_ENV(vfrec7_v_d) +GEN_VEXT_V_ENV(vfrec7_v_h, float16) +GEN_VEXT_V_ENV(vfrec7_v_w, float32) +GEN_VEXT_V_ENV(vfrec7_v_d, float64) =20 /* Vector Floating-Point MIN/MAX Instructions */ RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) -GEN_VEXT_VV_ENV(vfmin_vv_h) -GEN_VEXT_VV_ENV(vfmin_vv_w) -GEN_VEXT_VV_ENV(vfmin_vv_d) +GEN_VEXT_VV_ENV(vfmin_vv_h, float16) +GEN_VEXT_VV_ENV(vfmin_vv_w, float32) +GEN_VEXT_VV_ENV(vfmin_vv_d, float64) RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) -GEN_VEXT_VF(vfmin_vf_h) -GEN_VEXT_VF(vfmin_vf_w) -GEN_VEXT_VF(vfmin_vf_d) +GEN_VEXT_VF(vfmin_vf_h, float16) +GEN_VEXT_VF(vfmin_vf_w, float32) +GEN_VEXT_VF(vfmin_vf_d, float64) =20 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) -GEN_VEXT_VV_ENV(vfmax_vv_h) -GEN_VEXT_VV_ENV(vfmax_vv_w) -GEN_VEXT_VV_ENV(vfmax_vv_d) +GEN_VEXT_VV_ENV(vfmax_vv_h, float16) +GEN_VEXT_VV_ENV(vfmax_vv_w, float32) +GEN_VEXT_VV_ENV(vfmax_vv_d, float64) RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) -GEN_VEXT_VF(vfmax_vf_h) -GEN_VEXT_VF(vfmax_vf_w) -GEN_VEXT_VF(vfmax_vf_d) +GEN_VEXT_VF(vfmax_vf_h, float16) +GEN_VEXT_VF(vfmax_vf_w, float32) +GEN_VEXT_VF(vfmax_vf_d, float64) =20 /* Vector Floating-Point Sign-Injection Instructions */ static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) @@ -3993,15 +4017,15 @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, flo= at_status *s) RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) -GEN_VEXT_VV_ENV(vfsgnj_vv_h) -GEN_VEXT_VV_ENV(vfsgnj_vv_w) -GEN_VEXT_VV_ENV(vfsgnj_vv_d) +GEN_VEXT_VV_ENV(vfsgnj_vv_h, float16) +GEN_VEXT_VV_ENV(vfsgnj_vv_w, float32) +GEN_VEXT_VV_ENV(vfsgnj_vv_d, float64) RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) -GEN_VEXT_VF(vfsgnj_vf_h) -GEN_VEXT_VF(vfsgnj_vf_w) -GEN_VEXT_VF(vfsgnj_vf_d) +GEN_VEXT_VF(vfsgnj_vf_h, float16) +GEN_VEXT_VF(vfsgnj_vf_w, float32) +GEN_VEXT_VF(vfsgnj_vf_d, float64) =20 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) { @@ -4021,15 +4045,15 @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, fl= oat_status *s) RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) -GEN_VEXT_VV_ENV(vfsgnjn_vv_h) -GEN_VEXT_VV_ENV(vfsgnjn_vv_w) -GEN_VEXT_VV_ENV(vfsgnjn_vv_d) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, float16) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, float32) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, float64) RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) -GEN_VEXT_VF(vfsgnjn_vf_h) -GEN_VEXT_VF(vfsgnjn_vf_w) -GEN_VEXT_VF(vfsgnjn_vf_d) +GEN_VEXT_VF(vfsgnjn_vf_h, float16) +GEN_VEXT_VF(vfsgnjn_vf_w, float32) +GEN_VEXT_VF(vfsgnjn_vf_d, float64) =20 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) { @@ -4049,15 +4073,15 @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, fl= oat_status *s) RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) -GEN_VEXT_VV_ENV(vfsgnjx_vv_h) -GEN_VEXT_VV_ENV(vfsgnjx_vv_w) -GEN_VEXT_VV_ENV(vfsgnjx_vv_d) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, float16) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, float32) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, float64) RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) -GEN_VEXT_VF(vfsgnjx_vf_h) -GEN_VEXT_VF(vfsgnjx_vf_w) -GEN_VEXT_VF(vfsgnjx_vf_d) +GEN_VEXT_VF(vfsgnjx_vf_h, float16) +GEN_VEXT_VF(vfsgnjx_vf_w, float32) +GEN_VEXT_VF(vfsgnjx_vf_d, float64) =20 /* Vector Floating-Point Compare Instructions */ #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ @@ -4066,6 +4090,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -4078,6 +4105,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ DO_OP(s2, s1, &env->fp_status)); \ } \ env->vstart =3D 0; \ + /* clear tail element */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) @@ -4090,6 +4123,9 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, vo= id *vs2, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -4101,6 +4137,12 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, v= oid *vs2, \ DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ } \ env->vstart =3D 0; \ + /* clear tail element */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) @@ -4201,12 +4243,16 @@ static void do_##NAME(void *vd, void *vs2, int i) = \ *((TD *)vd + HD(i)) =3D OP(s2); \ } =20 -#define GEN_VEXT_V(NAME) \ +#define GEN_VEXT_V(NAME, ETYPE) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -4216,6 +4262,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ do_##NAME(vd, vs2, i); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, \ + vl * esz, \ + vlmax * esz); \ } =20 target_ulong fclass_h(uint64_t frs1) @@ -4278,17 +4328,22 @@ target_ulong fclass_d(uint64_t frs1) RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) -GEN_VEXT_V(vfclass_v_h) -GEN_VEXT_V(vfclass_v_w) -GEN_VEXT_V(vfclass_v_d) +GEN_VEXT_V(vfclass_v_h, float16) +GEN_VEXT_V(vfclass_v_w, float32) +GEN_VEXT_V(vfclass_v_d, float64) =20 /* Vector Floating-Point Merge Instruction */ + #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -4297,6 +4352,9 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, vo= id *vs2, \ =3D (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) @@ -4308,33 +4366,33 @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, float16) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, float32) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, float64) =20 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) -GEN_VEXT_V_ENV(vfcvt_x_f_v_h) -GEN_VEXT_V_ENV(vfcvt_x_f_v_w) -GEN_VEXT_V_ENV(vfcvt_x_f_v_d) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, float16) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, float32) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, float64) =20 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, float16) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, float32) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, float64) =20 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_x_v_h) -GEN_VEXT_V_ENV(vfcvt_f_x_v_w) -GEN_VEXT_V_ENV(vfcvt_f_x_v_d) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, float16) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, float32) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, float64) =20 /* Widening Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4344,30 +4402,30 @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d) /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned inte= ger.*/ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, float32) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, float64) =20 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer= . */ RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, float32) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, float64) =20 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width fl= oat */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, float16) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, float32) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, float64) =20 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, float16) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, float32) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, float64) =20 /* * vfwcvt.f.f.v vd, vs2, vm @@ -4380,8 +4438,8 @@ static uint32_t vfwcvtffv16(uint16_t a, float_status = *s) =20 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, float32) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, float64) =20 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4392,29 +4450,29 @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, uint8_t) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, uint16_t) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, uint32_t) =20 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer= . */ RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) -GEN_VEXT_V_ENV(vfncvt_x_f_w_b) -GEN_VEXT_V_ENV(vfncvt_x_f_w_h) -GEN_VEXT_V_ENV(vfncvt_x_f_w_w) +GEN_VEXT_V_ENV(vfncvt_x_f_w_b, int8_t) +GEN_VEXT_V_ENV(vfncvt_x_f_w_h, int16_t) +GEN_VEXT_V_ENV(vfncvt_x_f_w_w, int32_t) =20 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to fl= oat */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, float16) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, float32) =20 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_x_w_h) -GEN_VEXT_V_ENV(vfncvt_f_x_w_w) +GEN_VEXT_V_ENV(vfncvt_f_x_w_h, float16) +GEN_VEXT_V_ENV(vfncvt_f_x_w_w, float32) =20 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. = */ static uint16_t vfncvtffv16(uint32_t a, float_status *s) @@ -4424,8 +4482,8 @@ static uint16_t vfncvtffv16(uint32_t a, float_status = *s) =20 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_f_w_h) -GEN_VEXT_V_ENV(vfncvt_f_f_w_w) +GEN_VEXT_V_ENV(vfncvt_f_f_w_h, float16) +GEN_VEXT_V_ENV(vfncvt_f_f_w_w, float32) =20 /* *** Vector Reduction Operations --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647695550777801.5567702268022; Sat, 19 Mar 2022 06:12:30 -0700 (PDT) Received: from localhost ([::1]:46244 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYsf-0004lN-DE for importer@patchew.org; Sat, 19 Mar 2022 09:12:29 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57684) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYY0-0005Cx-Fs; Sat, 19 Mar 2022 08:51:08 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36008) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXz-0003Z1-0g; Sat, 19 Mar 2022 08:51:08 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 11B1211EF91; Sat, 19 Mar 2022 12:50:42 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 04:21:37 -0800 Subject: [PATCH qemu 11/13] target/riscv: rvv: Add tail agnostic for vector reduction instructions Message-ID: <164769423983.18409.14760549429989700286-11@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647695552747100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 63746f3321..37dc0eb9b3 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4495,6 +4495,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(TD); \ + uint32_t vlenb =3D env_archcpu(env)->cfg.vlen >> 3; \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ TD s1 =3D *((TD *)vs1 + HD(0)); \ \ @@ -4507,6 +4510,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ } \ *((TD *)vd + HD(0)) =3D s1; \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, \ + vlenb); \ } =20 /* vd[0] =3D sum(vs1[0], vs2[*]) */ @@ -4612,6 +4618,9 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void = *vs1, { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t esz =3D sizeof(uint32_t); + uint32_t vlenb =3D env_archcpu(env)->cfg.vlen >> 3; + uint32_t vta =3D vext_vta(desc); uint32_t i; uint32_t s1 =3D *((uint32_t *)vs1 + H4(0)); =20 @@ -4625,6 +4634,8 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void = *vs1, } *((uint32_t *)vd + H4(0)) =3D s1; env->vstart =3D 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, vlenb); } =20 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, @@ -4632,6 +4643,9 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void = *vs1, { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t esz =3D sizeof(uint64_t); + uint32_t vlenb =3D env_archcpu(env)->cfg.vlen >> 3; + uint32_t vta =3D vext_vta(desc); uint32_t i; uint64_t s1 =3D *((uint64_t *)vs1); =20 @@ -4645,6 +4659,8 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void = *vs1, } *((uint64_t *)vd) =3D s1; env->vstart =3D 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, vlenb); } =20 /* --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647695257135203.4576985866188; Sat, 19 Mar 2022 06:07:37 -0700 (PDT) Received: from localhost ([::1]:39316 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYnv-0008Lm-Hd for importer@patchew.org; Sat, 19 Mar 2022 09:07:35 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57702) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYY1-0005GN-Ft; Sat, 19 Mar 2022 08:51:09 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36006) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYXz-0003Z0-0j; Sat, 19 Mar 2022 08:51:09 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 3051811EF98; Sat, 19 Mar 2022 12:50:42 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 07:26:05 -0800 Subject: [PATCH qemu 12/13] target/riscv: rvv: Add tail agnostic for vector mask instructions Message-ID: <164769423983.18409.14760549429989700286-12@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647695258799100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 4 ++++ target/riscv/vector_helper.c | 27 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index d6b17e2712..e345a387b2 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -3110,6 +3110,7 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) = \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -3215,6 +3216,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ cpu_env, s->cfg_ptr->vlen / 8, \ @@ -3253,6 +3255,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_= m *a) =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_3_ptr * const fns[4] =3D { gen_helper_viota_m_b, gen_helper_viota_m_h, gen_helper_viota_m_w, gen_helper_viota_m_d, @@ -3282,6 +3285,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_2_ptr * const fns[4] =3D { gen_helper_vid_v_b, gen_helper_vid_v_h, gen_helper_vid_v_w, gen_helper_vid_v_d, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 37dc0eb9b3..9cf1b0f349 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4673,6 +4673,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ int a, b; \ \ @@ -4682,6 +4685,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ vext_set_elem_mask(vd, i, OP(b, a)); \ } \ env->vstart =3D 0; \ + /* clear tail element */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 #define DO_NAND(N, M) (!(N & M)) @@ -4749,6 +4758,8 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPU= RISCVState *env, { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); int i; bool first_mask_bit =3D false; =20 @@ -4777,6 +4788,12 @@ static void vmsetm(void *vd, void *v0, void *vs2, CP= URISCVState *env, } } env->vstart =3D 0; + /* clear tail element */ + if (vta) { + for (; i < vlmax; i++) { + vext_set_elem_mask(vd, i, 1); + } + } } =20 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, @@ -4804,6 +4821,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPUR= ISCVState *env, \ { \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t sum =3D 0; = \ int i; \ \ @@ -4817,6 +4837,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPUR= ISCVState *env, \ } \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) @@ -4830,6 +4852,9 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *= env, uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ int i; \ \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -4839,6 +4864,8 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *= env, uint32_t desc) \ *((ETYPE *)vd + H(i)) =3D i; = \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) --=20 2.34.1 From nobody Sun Feb 8 20:57:58 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1647695160560248.5535366753022; Sat, 19 Mar 2022 06:06:00 -0700 (PDT) Received: from localhost ([::1]:34258 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nVYmN-0004wt-9r for importer@patchew.org; Sat, 19 Mar 2022 09:05:59 -0400 Received: from eggs.gnu.org ([209.51.188.92]:57716) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYY2-0005KE-E7; Sat, 19 Mar 2022 08:51:10 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36010) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nVYY0-0003ZW-I3; Sat, 19 Mar 2022 08:51:10 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 5926B11EFC5; Sat, 19 Mar 2022 12:50:42 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 07:59:26 -0800 Subject: [PATCH qemu 13/13] target/riscv: rvv: Add tail agnostic for vector permutation instructions Message-ID: <164769423983.18409.14760549429989700286-13@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164769423983.18409.14760549429989700286-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: Frank Chang , Alistair Francis , Bin Meng , Palmer Dabbelt , eop Chen Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1647695162115100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 2 ++ target/riscv/vector_helper.c | 39 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index e345a387b2..8168e3db41 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -3731,6 +3731,7 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r= *a) tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, @@ -3836,6 +3837,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, u= int8_t seq) } =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); =20 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs2), cpu_env, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 9cf1b0f349..f2601f5b6b 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4884,6 +4884,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ { \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ target_ulong offset =3D s1, i_min, i; = \ \ i_min =3D MAX(env->vstart, offset); = \ @@ -4893,6 +4896,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ } \ *((ETYPE *)vd + H(i)) =3D *((ETYPE *)vs2 + H(i - offset)); = \ } \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] =3D vs2[i] */ @@ -4908,6 +4913,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ uint32_t vlmax =3D vext_max_elems(desc, ctzl(sizeof(ETYPE))); = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vta =3D vext_vta(desc); = \ target_ulong i_max, i; \ \ i_max =3D MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); = \ @@ -4924,6 +4931,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ } \ \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] =3D vs2[i+rs1] */ @@ -4939,6 +4948,9 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, = target_ulong s1, \ typedef uint##BITWIDTH##_t ETYPE; = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t i; = \ = \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -4952,6 +4964,8 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, = target_ulong s1, \ } = \ } = \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ = \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); = \ } =20 GEN_VEXT_VSLIE1UP(8, H1) @@ -4979,6 +4993,9 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0= , target_ulong s1, \ typedef uint##BITWIDTH##_t ETYPE; = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t i; = \ = \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -4992,6 +5009,8 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0= , target_ulong s1, \ } = \ } = \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ = \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); = \ } =20 GEN_VEXT_VSLIDE1DOWN(8, H1) @@ -5045,6 +5064,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ uint32_t vlmax =3D vext_max_elems(desc, ctzl(sizeof(TS2))); = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(TS2); = \ + uint32_t vta =3D vext_vta(desc); = \ uint64_t index; \ uint32_t i; \ \ @@ -5060,6 +5081,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ } \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* vd[i] =3D (vs1[i] >=3D VLMAX) ? 0 : vs2[vs1[i]]; */ @@ -5080,6 +5103,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ uint32_t vlmax =3D vext_max_elems(desc, ctzl(sizeof(ETYPE))); = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vta =3D vext_vta(desc); = \ uint64_t index =3D s1; = \ uint32_t i; \ \ @@ -5094,6 +5119,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ } \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* vd[i] =3D (x[rs1] >=3D VLMAX) ? 0 : vs2[rs1] */ @@ -5108,6 +5135,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_vlmax(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t num =3D 0, i; = \ \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -5118,6 +5148,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ num++; \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* Compress into vd elements of vs2 where vs1 is enabled */ @@ -5154,6 +5186,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ { \ uint32_t vl =3D env->vl; \ uint32_t vm =3D vext_vm(desc); \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_vlmax(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -5163,6 +5199,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ *((ETYPE *)vd + HD(i)) =3D *((DTYPE *)vs2 + HS1(i)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) --=20 2.34.1