From nobody Thu Apr 3 11:52:28 2025 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1741574304471986.0475997557453; Sun, 9 Mar 2025 19:38:24 -0700 (PDT) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1trT1c-0003xI-18; Sun, 09 Mar 2025 22:37:52 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1trT1X-0003eI-KE; Sun, 09 Mar 2025 22:37:47 -0400 Received: from out28-97.mail.aliyun.com ([115.124.28.97]) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1trT1T-0003Qq-Hs; Sun, 09 Mar 2025 22:37:47 -0400 Received: from TC-DZ-03-0020.tc.local(mailfrom:lc00631@tecorigin.com fp:SMTPD_---.booCGce_1741574249 cluster:ay29) by smtp.aliyun-inc.com; Mon, 10 Mar 2025 10:37:32 +0800 From: Chao Liu To: alistair23@gmail.com Cc: alistair.francis@wdc.com, bmeng.cn@gmail.com, dbarboza@ventanamicro.com, lc00631@tecorigin.com, liwei1518@gmail.com, palmer@dabbelt.com, qemu-devel@nongnu.org, qemu-riscv@nongnu.org, zhiwei_liu@linux.alibaba.com, zqz00548@tecorigin.com Subject: [PATCH v4 1/2] target/riscv: refactor VSTART_CHECK_EARLY_EXIT() to accept vl as a parameter Date: Mon, 10 Mar 2025 10:35:24 +0800 Message-ID: X-Mailer: git-send-email 2.47.0.windows.2 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=115.124.28.97; envelope-from=lc00631@tecorigin.com; helo=out28-97.mail.aliyun.com X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_DNSWL_NONE=-0.0001, RCVD_IN_MSPIKE_H3=0.001, RCVD_IN_MSPIKE_WL=0.001, RCVD_IN_VALIDITY_RPBL_BLOCKED=0.001, RCVD_IN_VALIDITY_SAFE_BLOCKED=0.001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, UNPARSEABLE_RELAY=0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: qemu-devel-bounces+importer=patchew.org@nongnu.org X-ZM-MESSAGEID: 1741574307262019100 Content-Type: text/plain; charset="utf-8" Some vector instructions are special, such as the vlm.v instruction, where setting its vl actually sets evl =3D (vl + 7) >> 3. To improve maintainability, we will uniformly use VSTART_CHECK_EARLY_EXIT() to check for the condition vstart >=3D vl. This function will also handle cases involving evl. Fixes: df4252b2ec ("target/riscv/vector_helpers: do early exit when vstart >=3D vl") Signed-off-by: Chao Liu Reviewed-by: Daniel Henrique Barboza --- target/riscv/vcrypto_helper.c | 32 +++++++-------- target/riscv/vector_helper.c | 69 ++++++++++++++++----------------- target/riscv/vector_internals.c | 4 +- target/riscv/vector_internals.h | 12 +++--- 4 files changed, 57 insertions(+), 60 deletions(-) diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index f7423df..1526de9 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -222,7 +222,7 @@ static inline void xor_round_key(AESState *round_state,= AESState *round_key) uint32_t total_elems =3D vext_get_total_elems(env, desc, 4); = \ uint32_t vta =3D vext_vta(desc); = \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { = \ AESState round_key; \ @@ -248,7 +248,7 @@ static inline void xor_round_key(AESState *round_state,= AESState *round_key) uint32_t total_elems =3D vext_get_total_elems(env, desc, 4); = \ uint32_t vta =3D vext_vta(desc); = \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { = \ AESState round_key; \ @@ -309,7 +309,7 @@ void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, = uint32_t uimm, uint32_t total_elems =3D vext_get_total_elems(env, desc, 4); uint32_t vta =3D vext_vta(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); =20 uimm &=3D 0b1111; if (uimm > 10 || uimm =3D=3D 0) { @@ -357,7 +357,7 @@ void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, = uint32_t uimm, uint32_t total_elems =3D vext_get_total_elems(env, desc, 4); uint32_t vta =3D vext_vta(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); =20 uimm &=3D 0b1111; if (uimm > 14 || uimm < 2) { @@ -465,7 +465,7 @@ void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2,= CPURISCVState *env, uint32_t total_elems; uint32_t vta =3D vext_vta(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { if (sew =3D=3D MO_32) { @@ -582,7 +582,7 @@ void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs= 2, CPURISCVState *env, uint32_t total_elems; uint32_t vta =3D vext_vta(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, @@ -602,7 +602,7 @@ void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs= 2, CPURISCVState *env, uint32_t total_elems; uint32_t vta =3D vext_vta(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, @@ -622,7 +622,7 @@ void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs= 2, CPURISCVState *env, uint32_t total_elems; uint32_t vta =3D vext_vta(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, @@ -642,7 +642,7 @@ void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs= 2, CPURISCVState *env, uint32_t total_elems; uint32_t vta =3D vext_vta(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, @@ -676,7 +676,7 @@ void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, v= oid *vs2_vptr, uint32_t *vs1 =3D vs1_vptr; uint32_t *vs2 =3D vs2_vptr; =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (int i =3D env->vstart / 8; i < env->vl / 8; i++) { uint32_t w[24]; @@ -777,7 +777,7 @@ void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, ui= nt32_t uimm, uint32_t *vs2 =3D vs2_vptr; uint32_t v1[8], v2[8], v3[8]; =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (int i =3D env->vstart / 8; i < env->vl / 8; i++) { for (int k =3D 0; k < 8; k++) { @@ -802,7 +802,7 @@ void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, vo= id *vs2_vptr, uint32_t vta =3D vext_vta(desc); uint32_t total_elems =3D vext_get_total_elems(env, desc, 4); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { uint64_t Y[2] =3D {vd[i * 2 + 0], vd[i * 2 + 1]}; @@ -841,7 +841,7 @@ void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CP= URISCVState *env, uint32_t vta =3D vext_vta(desc); uint32_t total_elems =3D vext_get_total_elems(env, desc, 4); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D env->vstart / 4; i < env->vl / 4; i++) { uint64_t Y[2] =3D {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])}; @@ -879,7 +879,7 @@ void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uim= m5, CPURISCVState *env, uint32_t esz =3D sizeof(uint32_t); uint32_t total_elems =3D vext_get_total_elems(env, desc, esz); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D group_start; i < group_end; ++i) { uint32_t vstart =3D i * egs; @@ -937,7 +937,7 @@ void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVStat= e *env, uint32_t desc) uint32_t esz =3D sizeof(uint32_t); uint32_t total_elems =3D vext_get_total_elems(env, desc, esz); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D group_start; i < group_end; ++i) { uint32_t vstart =3D i * egs; @@ -973,7 +973,7 @@ void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVStat= e *env, uint32_t desc) uint32_t esz =3D sizeof(uint32_t); uint32_t total_elems =3D vext_get_total_elems(env, desc, esz); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (uint32_t i =3D group_start; i < group_end; ++i) { uint32_t vstart =3D i * egs; diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 71b823d..217d2f6 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -260,7 +260,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,= target_ulong stride, uint32_t esz =3D 1 << log2_esz; uint32_t vma =3D vext_vma(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 for (i =3D env->vstart; i < env->vl; env->vstart =3D ++i) { k =3D 0; @@ -383,10 +383,7 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVStat= e *env, uint32_t desc, uint32_t msize =3D nf * esz; int mmu_index =3D riscv_env_mmu_index(env, false); =20 - if (env->vstart >=3D evl) { - env->vstart =3D 0; - return; - } + VSTART_CHECK_EARLY_EXIT(env, evl); =20 #if defined(CONFIG_USER_ONLY) /* @@ -544,7 +541,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t esz =3D 1 << log2_esz; uint32_t vma =3D vext_vma(desc); =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 /* load bytes from guest memory */ for (i =3D env->vstart; i < env->vl; env->vstart =3D ++i) { @@ -638,7 +635,7 @@ vext_ldff(void *vd, void *v0, target_ulong base, CPURIS= CVState *env, int flags; void *host; =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); =20 addr =3D base + ((env->vstart * nf) << log2_esz); page_split =3D -(addr | TARGET_PAGE_MASK); @@ -1116,7 +1113,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s1 =3D *((ETYPE *)vs1 + H(i)); \ @@ -1150,7 +1147,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ uint32_t vta =3D vext_vta(desc); = \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { = \ ETYPE s2 =3D *((ETYPE *)vs2 + H(i)); = \ @@ -1187,7 +1184,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ uint32_t vta_all_1s =3D vext_vta_all_1s(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s1 =3D *((ETYPE *)vs1 + H(i)); \ @@ -1227,7 +1224,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ uint32_t vta_all_1s =3D vext_vta_all_1s(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s2 =3D *((ETYPE *)vs2 + H(i)); \ @@ -1325,7 +1322,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ uint32_t vma =3D vext_vma(desc); = \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { = \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -1374,7 +1371,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -1438,7 +1435,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s1 =3D *((ETYPE *)vs1 + H(i)); \ @@ -1505,7 +1502,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s2 =3D *((ETYPE *)vs2 + H(i)); \ @@ -2054,7 +2051,7 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState = *env, \ uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s1 =3D *((ETYPE *)vs1 + H(i)); \ @@ -2080,7 +2077,7 @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVStat= e *env, \ uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ *((ETYPE *)vd + H(i)) =3D (ETYPE)s1; \ @@ -2105,7 +2102,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE *vt =3D (!vext_elem_mask(v0, i) ? vs2 : vs1); \ @@ -2131,7 +2128,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s2 =3D *((ETYPE *)vs2 + H(i)); \ @@ -2178,7 +2175,7 @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, uint32_t vl, uint32_t vm, int vxrm, opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) { - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); =20 for (uint32_t i =3D env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -2305,7 +2302,7 @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void= *vs2, uint32_t vl, uint32_t vm, int vxrm, opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) { - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); =20 for (uint32_t i =3D env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -3104,7 +3101,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -3149,7 +3146,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, = \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -3737,7 +3734,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ if (vl =3D=3D 0) { \ return; \ @@ -4260,7 +4257,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s1 =3D *((ETYPE *)vs1 + H(i)); \ @@ -4302,7 +4299,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, vo= id *vs2, \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s2 =3D *((ETYPE *)vs2 + H(i)); \ @@ -4497,7 +4494,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, vo= id *vs2, \ uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ ETYPE s2 =3D *((ETYPE *)vs2 + H(i)); \ @@ -4827,7 +4824,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ uint32_t i; \ int a, b; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ a =3D vext_elem_mask(vs1, i); \ @@ -5022,7 +5019,7 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *= env, uint32_t desc) \ uint32_t vma =3D vext_vma(desc); = \ int i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { = \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -5059,7 +5056,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ uint32_t vma =3D vext_vma(desc); = \ target_ulong offset =3D s1, i_min, i; = \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ i_min =3D MAX(env->vstart, offset); = \ for (i =3D i_min; i < vl; i++) { = \ @@ -5094,7 +5091,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ uint32_t vma =3D vext_vma(desc); = \ target_ulong i_max, i_min, i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ i_min =3D MIN(s1 < vlmax ? vlmax - s1 : 0, vl); = \ i_max =3D MAX(i_min, env->vstart); = \ @@ -5138,7 +5135,7 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, = uint64_t s1, \ uint32_t vma =3D vext_vma(desc); = \ uint32_t i; = \ = \ - VSTART_CHECK_EARLY_EXIT(env); = \ + VSTART_CHECK_EARLY_EXIT(env, vl); = \ = \ for (i =3D env->vstart; i < vl; i++) { = \ if (!vm && !vext_elem_mask(v0, i)) { = \ @@ -5189,7 +5186,7 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0= , uint64_t s1, \ uint32_t vma =3D vext_vma(desc); = \ uint32_t i; = \ = \ - VSTART_CHECK_EARLY_EXIT(env); = \ + VSTART_CHECK_EARLY_EXIT(env, vl); = \ = \ for (i =3D env->vstart; i < vl; i++) { = \ if (!vm && !vext_elem_mask(v0, i)) { = \ @@ -5266,7 +5263,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ uint64_t index; \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { = \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -5311,7 +5308,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ uint64_t index =3D s1; = \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { = \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -5407,7 +5404,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internal= s.c index 05b2d01..b490b1d 100644 --- a/target/riscv/vector_internals.c +++ b/target/riscv/vector_internals.c @@ -66,7 +66,7 @@ void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, uint32_t vma =3D vext_vma(desc); uint32_t i; =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); =20 for (i =3D env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -92,7 +92,7 @@ void do_vext_vx(void *vd, void *v0, target_long s1, void = *vs2, uint32_t vma =3D vext_vma(desc); uint32_t i; =20 - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); =20 for (i =3D env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internal= s.h index a11cc83..8eee7e5 100644 --- a/target/riscv/vector_internals.h +++ b/target/riscv/vector_internals.h @@ -25,11 +25,11 @@ #include "tcg/tcg-gvec-desc.h" #include "internals.h" =20 -#define VSTART_CHECK_EARLY_EXIT(env) do { \ - if (env->vstart >=3D env->vl) { \ - env->vstart =3D 0; \ - return; \ - } \ +#define VSTART_CHECK_EARLY_EXIT(env, vl) do { \ + if (env->vstart >=3D vl) { \ + env->vstart =3D 0; \ + return; \ + } \ } while (0) =20 static inline uint32_t vext_nf(uint32_t desc) @@ -159,7 +159,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ uint32_t vma =3D vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i =3D env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ --=20 2.48.1 From nobody Thu Apr 3 11:52:28 2025 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1741574303739609.1898561885802; Sun, 9 Mar 2025 19:38:23 -0700 (PDT) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1trT1Y-0003eX-KI; Sun, 09 Mar 2025 22:37:48 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1trT1W-0003db-5f; Sun, 09 Mar 2025 22:37:46 -0400 Received: from out28-149.mail.aliyun.com ([115.124.28.149]) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1trT1T-0003Qo-FV; Sun, 09 Mar 2025 22:37:45 -0400 Received: from TC-DZ-03-0020.tc.local(mailfrom:lc00631@tecorigin.com fp:SMTPD_---.booCGfo_1741574252 cluster:ay29) by smtp.aliyun-inc.com; Mon, 10 Mar 2025 10:37:32 +0800 From: Chao Liu To: alistair23@gmail.com Cc: alistair.francis@wdc.com, bmeng.cn@gmail.com, dbarboza@ventanamicro.com, lc00631@tecorigin.com, liwei1518@gmail.com, palmer@dabbelt.com, qemu-devel@nongnu.org, qemu-riscv@nongnu.org, zhiwei_liu@linux.alibaba.com, zqz00548@tecorigin.com Subject: [PATCH v4 2/2] target/riscv: fix handling of nop for vstart >= vl in some vector instruction Date: Mon, 10 Mar 2025 10:35:25 +0800 Message-ID: X-Mailer: git-send-email 2.47.0.windows.2 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=115.124.28.149; envelope-from=lc00631@tecorigin.com; helo=out28-149.mail.aliyun.com X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_DNSWL_NONE=-0.0001, RCVD_IN_MSPIKE_H2=0.001, RCVD_IN_VALIDITY_RPBL_BLOCKED=0.001, RCVD_IN_VALIDITY_SAFE_BLOCKED=0.001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, UNPARSEABLE_RELAY=0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: qemu-devel-bounces+importer=patchew.org@nongnu.org X-ZM-MESSAGEID: 1741574305444019000 Content-Type: text/plain; charset="utf-8" Recently, when I was writing a RISCV test, I found that when VL is set to 0= , the instruction should be nop, but when I tested it, I found that QEMU will tre= at all elements as tail elements, and in the case of VTA=3D1, write all elemen= ts to 1. After troubleshooting, it was found that the vext_vx_rm_1 function was call= ed in the vext_vx_rm_2, and then the vext_set_elems_1s function was called to pro= cess the tail element, but only VSTART >=3D vl was checked in the vext_vx_rm_1 function, which caused the tail element to still be processed even if it was returned in advance. So I've made the following change: Put VSTART_CHECK_EARLY_EXIT(env) at the beginning of the vext_vx_rm_2 funct= ion, so that the VSTART register is checked correctly. Fixes: df4252b2ec ("target/riscv/vector_helpers: do early exit when vstart >=3D vl") Signed-off-by: Chao Liu Reviewed-by: Daniel Henrique Barboza --- target/riscv/vector_helper.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 217d2f6..67b3baf 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2175,8 +2175,6 @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, uint32_t vl, uint32_t vm, int vxrm, opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) { - VSTART_CHECK_EARLY_EXIT(env, vl); - for (uint32_t i =3D env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { /* set masked-off elements to 1s */ @@ -2200,6 +2198,8 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, uint32_t vta =3D vext_vta(desc); uint32_t vma =3D vext_vma(desc); =20 + VSTART_CHECK_EARLY_EXIT(env, vl); + switch (env->vxrm) { case 0: /* rnu */ vext_vv_rm_1(vd, v0, vs1, vs2, @@ -2302,8 +2302,6 @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void= *vs2, uint32_t vl, uint32_t vm, int vxrm, opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) { - VSTART_CHECK_EARLY_EXIT(env, vl); - for (uint32_t i =3D env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { /* set masked-off elements to 1s */ @@ -2327,6 +2325,8 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void= *vs2, uint32_t vta =3D vext_vta(desc); uint32_t vma =3D vext_vma(desc); =20 + VSTART_CHECK_EARLY_EXIT(env, vl); + switch (env->vxrm) { case 0: /* rnu */ vext_vx_rm_1(vd, v0, s1, vs2, @@ -4662,6 +4662,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ uint32_t i; \ TD s1 =3D *((TD *)vs1 + HD(0)); \ \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ + \ for (i =3D env->vstart; i < vl; i++) { \ TS2 s2 =3D *((TS2 *)vs2 + HS2(i)); \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -4750,6 +4752,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ uint32_t i; \ TD s1 =3D *((TD *)vs1 + HD(0)); \ \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ + \ for (i =3D env->vstart; i < vl; i++) { \ TS2 s2 =3D *((TS2 *)vs2 + HS2(i)); \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -4914,6 +4918,8 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPU= RISCVState *env, int i; bool first_mask_bit =3D false; =20 + VSTART_CHECK_EARLY_EXIT(env, vl); + for (i =3D env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { /* set masked-off elements to 1s */ @@ -4986,6 +4992,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPUR= ISCVState *env, \ uint32_t sum =3D 0; = \ int i; \ \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ + \ for (i =3D env->vstart; i < vl; i++) { = \ if (!vm && !vext_elem_mask(v0, i)) { \ /* set masked-off elements to 1s */ \ @@ -5344,6 +5352,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ uint32_t vta =3D vext_vta(desc); = \ uint32_t num =3D 0, i; = \ \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ + \ for (i =3D env->vstart; i < vl; i++) { = \ if (!vext_elem_mask(vs1, i)) { \ continue; \ --=20 2.48.1