From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005071028908.724059906765; Tue, 22 Mar 2022 20:11:11 -0700 (PDT) Received: from localhost ([::1]:52882 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrOx-0005NZ-EV for importer@patchew.org; Tue, 22 Mar 2022 23:11:11 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48696) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMo-0002Yy-0K; Tue, 22 Mar 2022 23:08:58 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36404) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMk-00038L-Tw; Tue, 22 Mar 2022 23:08:57 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 972A811EF83; Wed, 23 Mar 2022 03:08:51 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Wed, 09 Mar 2022 00:34:29 -0800 Subject: [PATCH qemu v2 01/13] target/riscv: rvv: Rename ambiguous esz Message-ID: <164800493107.31817.14053311036718644936-1@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005073383100003 From: eopXD No functional change intended in this commit. Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 76 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index e94caf1a3c..d0452a7756 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -125,9 +125,9 @@ static inline int32_t vext_lmul(uint32_t desc) /* * Get the maximum number of elements can be operated. * - * esz: log2 of element size in bytes. + * log2_esz: log2 of element size in bytes. */ -static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) +static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) { /* * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. @@ -136,7 +136,7 @@ static inline uint32_t vext_max_elems(uint32_t desc, ui= nt32_t esz) uint32_t vlenb =3D simd_maxsz(desc); =20 /* Return VLMAX */ - int scale =3D vext_lmul(desc) - esz; + int scale =3D vext_lmul(desc) - log2_esz; return scale < 0 ? vlenb >> -scale : vlenb << scale; } =20 @@ -231,11 +231,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong bas= e, target_ulong stride, CPURISCVState *env, uint32_t desc, uint32_t vm, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra, MMUAccessType access_type) + uint32_t log2_esz, uintptr_t ra, MMUAccessType access_typ= e) { uint32_t i, k; uint32_t nf =3D vext_nf(desc); - uint32_t max_elems =3D vext_max_elems(desc, esz); + uint32_t max_elems =3D vext_max_elems(desc, log2_esz); =20 for (i =3D env->vstart; i < env->vl; i++, env->vstart++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -244,7 +244,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, =20 k =3D 0; while (k < nf) { - target_ulong addr =3D base + stride * i + (k << esz); + target_ulong addr =3D base + stride * i + (k << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); k++; } @@ -289,18 +289,18 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) /* unmasked unit-stride load and store operation*/ static void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t des= c, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, uintptr_t ra, MMUAccessType access_type) { uint32_t i, k; uint32_t nf =3D vext_nf(desc); - uint32_t max_elems =3D vext_max_elems(desc, esz); + uint32_t max_elems =3D vext_max_elems(desc, log2_esz); =20 /* load bytes from guest memory */ for (i =3D env->vstart; i < evl; i++, env->vstart++) { k =3D 0; while (k < nf) { - target_ulong addr =3D base + ((i * nf + k) << esz); + target_ulong addr =3D base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); k++; } @@ -399,12 +399,12 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, void *vs2, CPURISCVState *env, uint32_t desc, vext_get_index_addr get_index_addr, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra, MMUAccessType access_type) + uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type) { uint32_t i, k; uint32_t nf =3D vext_nf(desc); uint32_t vm =3D vext_vm(desc); - uint32_t max_elems =3D vext_max_elems(desc, esz); + uint32_t max_elems =3D vext_max_elems(desc, log2_esz); =20 /* load bytes from guest memory */ for (i =3D env->vstart; i < env->vl; i++, env->vstart++) { @@ -414,7 +414,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, =20 k =3D 0; while (k < nf) { - abi_ptr addr =3D get_index_addr(base, i, vs2) + (k << esz); + abi_ptr addr =3D get_index_addr(base, i, vs2) + (k << log2_esz= ); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); k++; } @@ -480,13 +480,13 @@ static inline void vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra) + uint32_t log2_esz, uintptr_t ra) { void *host; uint32_t i, k, vl =3D 0; uint32_t nf =3D vext_nf(desc); uint32_t vm =3D vext_vm(desc); - uint32_t max_elems =3D vext_max_elems(desc, esz); + uint32_t max_elems =3D vext_max_elems(desc, log2_esz); target_ulong addr, offset, remain; =20 /* probe every access*/ @@ -494,12 +494,12 @@ vext_ldff(void *vd, void *v0, target_ulong base, if (!vm && !vext_elem_mask(v0, i)) { continue; } - addr =3D adjust_addr(env, base + i * (nf << esz)); + addr =3D adjust_addr(env, base + i * (nf << log2_esz)); if (i =3D=3D 0) { - probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); + probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); } else { /* if it triggers an exception, no need to check watchpoint */ - remain =3D nf << esz; + remain =3D nf << log2_esz; while (remain > 0) { offset =3D -(addr | TARGET_PAGE_MASK); host =3D tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, @@ -536,7 +536,7 @@ ProbeSuccess: continue; } while (k < nf) { - target_ulong addr =3D base + ((i * nf + k) << esz); + target_ulong addr =3D base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); k++; } @@ -576,13 +576,13 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) */ static void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t = desc, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t= ra, MMUAccessType access_type) { uint32_t i, k, off, pos; uint32_t nf =3D vext_nf(desc); uint32_t vlenb =3D env_archcpu(env)->cfg.vlen >> 3; - uint32_t max_elems =3D vlenb >> esz; + uint32_t max_elems =3D vlenb >> log2_esz; =20 k =3D env->vstart / max_elems; off =3D env->vstart % max_elems; @@ -590,7 +590,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVSt= ate *env, uint32_t desc, if (off) { /* load/store rest of elements of current segment pointed by vstar= t */ for (pos =3D off; pos < max_elems; pos++, env->vstart++) { - target_ulong addr =3D base + ((pos + k * max_elems) << esz); + target_ulong addr =3D base + ((pos + k * max_elems) << log2_es= z); ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd= , ra); } k++; @@ -599,7 +599,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVSt= ate *env, uint32_t desc, /* load/store elements for rest of segments */ for (; k < nf; k++) { for (i =3D 0; i < max_elems; i++, env->vstart++) { - target_ulong addr =3D base + ((i + k * max_elems) << esz); + target_ulong addr =3D base + ((i + k * max_elems) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, = ra); } } @@ -4691,11 +4691,11 @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H= 2) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) =20 -#define GEN_VEXT_VSLIE1UP(ESZ, H) = \ -static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2= , \ - CPURISCVState *env, uint32_t desc) = \ +#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) = \ +static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, = \ + void *vs2, CPURISCVState *env, uint32_t desc) = \ { = \ - typedef uint##ESZ##_t ETYPE; = \ + typedef uint##BITWIDTH##_t ETYPE; = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ uint32_t i; = \ @@ -4718,11 +4718,11 @@ GEN_VEXT_VSLIE1UP(16, H2) GEN_VEXT_VSLIE1UP(32, H4) GEN_VEXT_VSLIE1UP(64, H8) =20 -#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ +#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } =20 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=3Dx[rs1], vd[i+1] =3D vs2[i] */ @@ -4731,11 +4731,11 @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) =20 -#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) = \ -static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *v= s2, \ - CPURISCVState *env, uint32_t desc) = \ +#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) = \ +static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, = \ + void *vs2, CPURISCVState *env, uint32_t desc) = \ { = \ - typedef uint##ESZ##_t ETYPE; = \ + typedef uint##BITWIDTH##_t ETYPE; = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ uint32_t i; = \ @@ -4758,11 +4758,11 @@ GEN_VEXT_VSLIDE1DOWN(16, H2) GEN_VEXT_VSLIDE1DOWN(32, H4) GEN_VEXT_VSLIDE1DOWN(64, H8) =20 -#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } =20 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] =3D vs2[i+1], vd[vl-1]=3Dx[rs1]= */ @@ -4772,11 +4772,11 @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) =20 /* Vector Floating-Point Slide Instructions */ -#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ +#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } =20 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=3Df[rs1], vd[i+1] =3D vs2[i] */ @@ -4784,11 +4784,11 @@ GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) =20 -#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ +#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } =20 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] =3D vs2[i+1], vd[vl-1]=3Df[rs1= ] */ --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005434546749.8770560558044; Tue, 22 Mar 2022 20:17:14 -0700 (PDT) Received: from localhost ([::1]:40160 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrUp-0007TO-65 for importer@patchew.org; Tue, 22 Mar 2022 23:17:15 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48702) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMo-0002ZF-61; Tue, 22 Mar 2022 23:08:58 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36406) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMl-00038M-6x; Tue, 22 Mar 2022 23:08:57 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id B9DEA11EF86; Wed, 23 Mar 2022 03:08:51 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Fri, 11 Mar 2022 22:28:22 -0800 Subject: [PATCH qemu v2 02/13] target/riscv: rvv: Early exit when vstart >= vl MIME-Version: 1.0 Message-ID: <164800493107.31817.14053311036718644936-2@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005445635100001 From: eopXD According to v-spec (section 5.4): When vstart =E2=89=A5 vl, there are no body elements, and no elements are updated in any destination vector register group, including that no tail elements are updated with agnostic values. Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 275fded6e4..1e51a3e79c 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -652,6 +652,7 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, ui= nt32_t data, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -818,6 +819,7 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1= , uint32_t rs2, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -925,6 +927,7 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1,= uint32_t vs2, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -1067,6 +1070,7 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uin= t32_t data, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -1216,6 +1220,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3F= n *gvec_fn, } =20 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 if (a->vm && s->vl_eq_vlmax) { gvec_fn(s->sew, vreg_ofs(s, a->rd), @@ -1263,6 +1268,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, ui= nt32_t vs2, uint32_t vm, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -1427,6 +1433,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, ui= nt32_t vs2, uint32_t vm, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -1513,6 +1520,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr = *a, uint32_t data =3D 0; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -1593,6 +1601,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr = *a, uint32_t data =3D 0; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -1670,6 +1679,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ }; \ TCGLabel *over =3D gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -1851,6 +1861,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ }; \ TCGLabel *over =3D gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2061,6 +2072,7 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_= v *a) }; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), cpu_env, s->cfg_ptr->vlen / 8, @@ -2084,6 +2096,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_= x *a) TCGv s1; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 s1 =3D get_gpr(s, a->rs1, EXT_SIGN); =20 @@ -2139,6 +2152,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_= i *a) }; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 s1 =3D tcg_constant_i64(simm); dest =3D tcg_temp_new_ptr(); @@ -2291,6 +2305,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2321,6 +2336,7 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, ui= nt32_t vs2, =20 TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 dest =3D tcg_temp_new_ptr(); mask =3D tcg_temp_new_ptr(); @@ -2409,6 +2425,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2483,6 +2500,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2604,6 +2622,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, TCGLabel *over =3D gen_new_label(); gen_set_rm(s, rm); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -2717,6 +2736,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_= v_f *a) }; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 t1 =3D tcg_temp_new_i64(); /* NaN-box f[rs1] */ @@ -2805,6 +2825,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2856,6 +2877,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -2921,6 +2943,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -2974,6 +2997,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ TCGLabel *over =3D gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -3061,6 +3085,7 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) = \ gen_helper_gvec_4_ptr *fn =3D gen_helper_##NAME; \ TCGLabel *over =3D gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -3201,6 +3226,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_= m *a) uint32_t data =3D 0; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -3229,6 +3255,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) uint32_t data =3D 0; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -3674,6 +3701,7 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r= *a) }; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), @@ -3746,6 +3774,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, u= int8_t seq) gen_helper_gvec_3_ptr *fn; TCGLabel *over =3D gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 static gen_helper_gvec_3_ptr * const fns[6][4] =3D { { --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 164800527378716.841647182652764; Tue, 22 Mar 2022 20:14:33 -0700 (PDT) Received: from localhost ([::1]:32862 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrSB-0002W2-TZ for importer@patchew.org; Tue, 22 Mar 2022 23:14:31 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48660) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMn-0002YY-7O; Tue, 22 Mar 2022 23:08:57 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36408) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMk-00038N-OU; Tue, 22 Mar 2022 23:08:56 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id D8FB311EF8F; Wed, 23 Mar 2022 03:08:51 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Tue, 01 Mar 2022 01:07:38 -0800 Subject: [PATCH qemu v2 03/13] target/riscv: rvv: Add tail agnostic for vv instructions Message-ID: <164800493107.31817.14053311036718644936-3@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005274581100001 From: eopXD This is the first commit regarding the tail agnostic behavior. Added option 'rvv_ta_all_1s' to enable the behavior, the option is default to false. Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/cpu.c | 1 + target/riscv/cpu.h | 2 + target/riscv/cpu_helper.c | 2 + target/riscv/insn_trans/trans_rvv.c.inc | 1 + target/riscv/internals.h | 5 +- target/riscv/translate.c | 2 + target/riscv/vector_helper.c | 315 ++++++++++++++---------- 7 files changed, 197 insertions(+), 131 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index ddda4906ff..cd4cf4b41e 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -810,6 +810,7 @@ static Property riscv_cpu_properties[] =3D { DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), =20 DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), + DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_END_OF_LIST(), }; =20 diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index c069fe85fa..8c4a79b5a0 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -369,6 +369,7 @@ struct RISCVCPUConfig { bool ext_zhinxmin; bool ext_zve32f; bool ext_zve64f; + bool rvv_ta_all_1s; =20 /* Vendor-specific custom extensions */ bool ext_XVentanaCondOps; @@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2) /* If PointerMasking should be applied */ FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1) FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1) +FIELD(TB_FLAGS, VTA, 24, 1) =20 #ifdef TARGET_RISCV32 #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 1c60fb2e80..2941c88c31 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulon= g *pc, flags =3D FIELD_DP32(flags, TB_FLAGS, LMUL, FIELD_EX64(env->vtype, VTYPE, VLMUL)); flags =3D FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); + flags =3D FIELD_DP32(flags, TB_FLAGS, VTA, + FIELD_EX64(env->vtype, VTYPE, VTA)); } else { flags =3D FIELD_DP32(flags, TB_FLAGS, VILL, 1); } diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 1e51a3e79c..603abe0e9f 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1231,6 +1231,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3F= n *gvec_fn, =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, diff --git a/target/riscv/internals.h b/target/riscv/internals.h index dbb322bfa7..512c6c30cf 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -24,8 +24,9 @@ /* share data between vector helpers and decode code */ FIELD(VDATA, VM, 0, 1) FIELD(VDATA, LMUL, 1, 3) -FIELD(VDATA, NF, 4, 4) -FIELD(VDATA, WD, 4, 1) +FIELD(VDATA, VTA, 4, 1) +FIELD(VDATA, NF, 5, 4) +FIELD(VDATA, WD, 5, 1) =20 /* float point classify helpers */ target_ulong fclass_h(uint64_t frs1); diff --git a/target/riscv/translate.c b/target/riscv/translate.c index fac998a6b5..7775dade26 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -94,6 +94,7 @@ typedef struct DisasContext { */ int8_t lmul; uint8_t sew; + uint8_t vta; target_ulong vstart; bool vl_eq_vlmax; uint8_t ntemp; @@ -1083,6 +1084,7 @@ static void riscv_tr_init_disas_context(DisasContextB= ase *dcbase, CPUState *cs) ctx->vill =3D FIELD_EX32(tb_flags, TB_FLAGS, VILL); ctx->sew =3D FIELD_EX32(tb_flags, TB_FLAGS, SEW); ctx->lmul =3D sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); + ctx->vta =3D FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_al= l_1s; ctx->vstart =3D env->vstart; ctx->vl_eq_vlmax =3D FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); ctx->misa_mxl_max =3D env->misa_mxl_max; diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index d0452a7756..6c47d39251 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -122,6 +122,11 @@ static inline int32_t vext_lmul(uint32_t desc) return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); } =20 +static inline uint32_t vext_vta(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA); +} + /* * Get the maximum number of elements can be operated. * @@ -140,6 +145,20 @@ static inline uint32_t vext_max_elems(uint32_t desc, u= int32_t log2_esz) return scale < 0 ? vlenb >> -scale : vlenb << scale; } =20 +/* + * Get number of total elements, including prestart, body and tail element= s. + * Note that when LMUL < 1, the tail includes the elements past VLMAX that + * are held in the same vector register. + */ +static inline uint32_t vext_get_total_elem(RISCVCPU *cpu, target_ulong vty= pe) +{ + uint8_t sew =3D FIELD_EX64(vtype, VTYPE, VSEW); + int8_t lmul =3D sextract32(FIELD_EX64(vtype, VTYPE, VLMUL), 0, 3) < 0 ? + 0 : sextract32(FIELD_EX64(vtype, VTYPE, VLMUL), 0, 3); + return cpu->cfg.vlen >> (sew + 3 - lmul); +} + + static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong ad= dr) { return (addr & env->cur_pmmask) | env->cur_pmbase; @@ -172,6 +191,32 @@ static void probe_pages(CPURISCVState *env, target_ulo= ng addr, } } =20 +static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t c= nt, + uint32_t tot) +{ + if (is_agnostic =3D=3D 0) { + /* policy undisturbed */ + return; + } + if (tot - cnt =3D=3D 0) { + return ; + } + memset(base, -1, tot - cnt); +} + +/* Set agnostic elements to 1s */ +#define GEN_SET_ELEMS_1S(SET_ELEMS_1S_FN, ETYPE, H) = \ +static void SET_ELEMS_1S_FN(void *vd, uint32_t is_agnostic, uint32_t idx, = \ + uint32_t cnt, uint32_t tot) = \ +{ = \ + ETYPE *cur =3D ((ETYPE *)vd + H(idx)); = \ + vext_set_elems_1s(cur, is_agnostic, cnt, tot); = \ +} +GEN_SET_ELEMS_1S(vext_set_elems_1s_b, int8_t, H1) +GEN_SET_ELEMS_1S(vext_set_elems_1s_h, int16_t, H2) +GEN_SET_ELEMS_1S(vext_set_elems_1s_w, int32_t, H4) +GEN_SET_ELEMS_1S(vext_set_elems_1s_d, int64_t, H8) + static inline void vext_set_elem_mask(void *v0, int index, uint8_t value) { @@ -197,6 +242,14 @@ static inline int vext_elem_mask(void *v0, int index) typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, uint32_t idx, void *vd, uintptr_t retaddr); =20 +/* set bytes to all 1s for agnostic elements */ +typedef void vext_set_elems_1s_fn(void *vd, uint32_t vta, uint32_t idx, + uint32_t cnt, uint32_t tot); +static vext_set_elems_1s_fn *vext_set_elems_1s_fns[4] =3D { + vext_set_elems_1s_b, vext_set_elems_1s_h, + vext_set_elems_1s_w, vext_set_elems_1s_d +}; + #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ static void NAME(CPURISCVState *env, abi_ptr addr, \ uint32_t idx, void *vd, uintptr_t retaddr)\ @@ -710,10 +763,12 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_S= UB) =20 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - opivv2_fn *fn) + opivv2_fn *fn, uint32_t esz) { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); uint32_t i; =20 for (i =3D env->vstart; i < vl; i++) { @@ -723,26 +778,28 @@ static void do_vext_vv(void *vd, void *v0, void *vs1,= void *vs2, fn(vd, vs1, vs2, i); } env->vstart =3D 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); } =20 /* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME) \ +#define GEN_VEXT_VV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ do_vext_vv(vd, v0, vs1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } =20 -GEN_VEXT_VV(vadd_vv_b) -GEN_VEXT_VV(vadd_vv_h) -GEN_VEXT_VV(vadd_vv_w) -GEN_VEXT_VV(vadd_vv_d) -GEN_VEXT_VV(vsub_vv_b) -GEN_VEXT_VV(vsub_vv_h) -GEN_VEXT_VV(vsub_vv_w) -GEN_VEXT_VV(vsub_vv_d) +GEN_VEXT_VV(vadd_vv_b, 1) +GEN_VEXT_VV(vadd_vv_h, 2) +GEN_VEXT_VV(vadd_vv_w, 4) +GEN_VEXT_VV(vadd_vv_d, 8) +GEN_VEXT_VV(vsub_vv_b, 1) +GEN_VEXT_VV(vsub_vv_h, 2) +GEN_VEXT_VV(vsub_vv_w, 4) +GEN_VEXT_VV(vsub_vv_d, 8) =20 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); =20 @@ -887,30 +944,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, D= O_ADD) RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) -GEN_VEXT_VV(vwaddu_vv_b) -GEN_VEXT_VV(vwaddu_vv_h) -GEN_VEXT_VV(vwaddu_vv_w) -GEN_VEXT_VV(vwsubu_vv_b) -GEN_VEXT_VV(vwsubu_vv_h) -GEN_VEXT_VV(vwsubu_vv_w) -GEN_VEXT_VV(vwadd_vv_b) -GEN_VEXT_VV(vwadd_vv_h) -GEN_VEXT_VV(vwadd_vv_w) -GEN_VEXT_VV(vwsub_vv_b) -GEN_VEXT_VV(vwsub_vv_h) -GEN_VEXT_VV(vwsub_vv_w) -GEN_VEXT_VV(vwaddu_wv_b) -GEN_VEXT_VV(vwaddu_wv_h) -GEN_VEXT_VV(vwaddu_wv_w) -GEN_VEXT_VV(vwsubu_wv_b) -GEN_VEXT_VV(vwsubu_wv_h) -GEN_VEXT_VV(vwsubu_wv_w) -GEN_VEXT_VV(vwadd_wv_b) -GEN_VEXT_VV(vwadd_wv_h) -GEN_VEXT_VV(vwadd_wv_w) -GEN_VEXT_VV(vwsub_wv_b) -GEN_VEXT_VV(vwsub_wv_h) -GEN_VEXT_VV(vwsub_wv_w) +GEN_VEXT_VV(vwaddu_vv_b, 2) +GEN_VEXT_VV(vwaddu_vv_h, 4) +GEN_VEXT_VV(vwaddu_vv_w, 8) +GEN_VEXT_VV(vwsubu_vv_b, 2) +GEN_VEXT_VV(vwsubu_vv_h, 4) +GEN_VEXT_VV(vwsubu_vv_w, 8) +GEN_VEXT_VV(vwadd_vv_b, 2) +GEN_VEXT_VV(vwadd_vv_h, 4) +GEN_VEXT_VV(vwadd_vv_w, 8) +GEN_VEXT_VV(vwsub_vv_b, 2) +GEN_VEXT_VV(vwsub_vv_h, 4) +GEN_VEXT_VV(vwsub_vv_w, 8) +GEN_VEXT_VV(vwaddu_wv_b, 2) +GEN_VEXT_VV(vwaddu_wv_h, 4) +GEN_VEXT_VV(vwaddu_wv_w, 8) +GEN_VEXT_VV(vwsubu_wv_b, 2) +GEN_VEXT_VV(vwsubu_wv_h, 4) +GEN_VEXT_VV(vwsubu_wv_w, 8) +GEN_VEXT_VV(vwadd_wv_b, 2) +GEN_VEXT_VV(vwadd_wv_h, 4) +GEN_VEXT_VV(vwadd_wv_w, 8) +GEN_VEXT_VV(vwsub_wv_b, 2) +GEN_VEXT_VV(vwsub_wv_h, 4) +GEN_VEXT_VV(vwsub_wv_w, 8) =20 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) @@ -1089,18 +1146,18 @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO= _XOR) RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) -GEN_VEXT_VV(vand_vv_b) -GEN_VEXT_VV(vand_vv_h) -GEN_VEXT_VV(vand_vv_w) -GEN_VEXT_VV(vand_vv_d) -GEN_VEXT_VV(vor_vv_b) -GEN_VEXT_VV(vor_vv_h) -GEN_VEXT_VV(vor_vv_w) -GEN_VEXT_VV(vor_vv_d) -GEN_VEXT_VV(vxor_vv_b) -GEN_VEXT_VV(vxor_vv_h) -GEN_VEXT_VV(vxor_vv_w) -GEN_VEXT_VV(vxor_vv_d) +GEN_VEXT_VV(vand_vv_b, 1) +GEN_VEXT_VV(vand_vv_h, 2) +GEN_VEXT_VV(vand_vv_w, 4) +GEN_VEXT_VV(vand_vv_d, 8) +GEN_VEXT_VV(vor_vv_b, 1) +GEN_VEXT_VV(vor_vv_h, 2) +GEN_VEXT_VV(vor_vv_w, 4) +GEN_VEXT_VV(vor_vv_d, 8) +GEN_VEXT_VV(vxor_vv_b, 1) +GEN_VEXT_VV(vxor_vv_h, 2) +GEN_VEXT_VV(vxor_vv_w, 4) +GEN_VEXT_VV(vxor_vv_d, 8) =20 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) @@ -1346,22 +1403,22 @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO= _MAX) RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) -GEN_VEXT_VV(vminu_vv_b) -GEN_VEXT_VV(vminu_vv_h) -GEN_VEXT_VV(vminu_vv_w) -GEN_VEXT_VV(vminu_vv_d) -GEN_VEXT_VV(vmin_vv_b) -GEN_VEXT_VV(vmin_vv_h) -GEN_VEXT_VV(vmin_vv_w) -GEN_VEXT_VV(vmin_vv_d) -GEN_VEXT_VV(vmaxu_vv_b) -GEN_VEXT_VV(vmaxu_vv_h) -GEN_VEXT_VV(vmaxu_vv_w) -GEN_VEXT_VV(vmaxu_vv_d) -GEN_VEXT_VV(vmax_vv_b) -GEN_VEXT_VV(vmax_vv_h) -GEN_VEXT_VV(vmax_vv_w) -GEN_VEXT_VV(vmax_vv_d) +GEN_VEXT_VV(vminu_vv_b, 1) +GEN_VEXT_VV(vminu_vv_h, 2) +GEN_VEXT_VV(vminu_vv_w, 4) +GEN_VEXT_VV(vminu_vv_d, 8) +GEN_VEXT_VV(vmin_vv_b, 1) +GEN_VEXT_VV(vmin_vv_h, 2) +GEN_VEXT_VV(vmin_vv_w, 4) +GEN_VEXT_VV(vmin_vv_d, 8) +GEN_VEXT_VV(vmaxu_vv_b, 1) +GEN_VEXT_VV(vmaxu_vv_h, 2) +GEN_VEXT_VV(vmaxu_vv_w, 4) +GEN_VEXT_VV(vmaxu_vv_d, 8) +GEN_VEXT_VV(vmax_vv_b, 1) +GEN_VEXT_VV(vmax_vv_h, 2) +GEN_VEXT_VV(vmax_vv_w, 4) +GEN_VEXT_VV(vmax_vv_d, 8) =20 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) @@ -1402,10 +1459,10 @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO= _MUL) RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) -GEN_VEXT_VV(vmul_vv_b) -GEN_VEXT_VV(vmul_vv_h) -GEN_VEXT_VV(vmul_vv_w) -GEN_VEXT_VV(vmul_vv_d) +GEN_VEXT_VV(vmul_vv_b, 1) +GEN_VEXT_VV(vmul_vv_h, 2) +GEN_VEXT_VV(vmul_vv_w, 4) +GEN_VEXT_VV(vmul_vv_d, 8) =20 static int8_t do_mulh_b(int8_t s2, int8_t s1) { @@ -1509,18 +1566,18 @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1,= do_mulhsu_b) RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) -GEN_VEXT_VV(vmulh_vv_b) -GEN_VEXT_VV(vmulh_vv_h) -GEN_VEXT_VV(vmulh_vv_w) -GEN_VEXT_VV(vmulh_vv_d) -GEN_VEXT_VV(vmulhu_vv_b) -GEN_VEXT_VV(vmulhu_vv_h) -GEN_VEXT_VV(vmulhu_vv_w) -GEN_VEXT_VV(vmulhu_vv_d) -GEN_VEXT_VV(vmulhsu_vv_b) -GEN_VEXT_VV(vmulhsu_vv_h) -GEN_VEXT_VV(vmulhsu_vv_w) -GEN_VEXT_VV(vmulhsu_vv_d) +GEN_VEXT_VV(vmulh_vv_b, 1) +GEN_VEXT_VV(vmulh_vv_h, 2) +GEN_VEXT_VV(vmulh_vv_w, 4) +GEN_VEXT_VV(vmulh_vv_d, 8) +GEN_VEXT_VV(vmulhu_vv_b, 1) +GEN_VEXT_VV(vmulhu_vv_h, 2) +GEN_VEXT_VV(vmulhu_vv_w, 4) +GEN_VEXT_VV(vmulhu_vv_d, 8) +GEN_VEXT_VV(vmulhsu_vv_b, 1) +GEN_VEXT_VV(vmulhsu_vv_h, 2) +GEN_VEXT_VV(vmulhsu_vv_w, 4) +GEN_VEXT_VV(vmulhsu_vv_d, 8) =20 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) @@ -1579,22 +1636,22 @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO= _REM) RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) -GEN_VEXT_VV(vdivu_vv_b) -GEN_VEXT_VV(vdivu_vv_h) -GEN_VEXT_VV(vdivu_vv_w) -GEN_VEXT_VV(vdivu_vv_d) -GEN_VEXT_VV(vdiv_vv_b) -GEN_VEXT_VV(vdiv_vv_h) -GEN_VEXT_VV(vdiv_vv_w) -GEN_VEXT_VV(vdiv_vv_d) -GEN_VEXT_VV(vremu_vv_b) -GEN_VEXT_VV(vremu_vv_h) -GEN_VEXT_VV(vremu_vv_w) -GEN_VEXT_VV(vremu_vv_d) -GEN_VEXT_VV(vrem_vv_b) -GEN_VEXT_VV(vrem_vv_h) -GEN_VEXT_VV(vrem_vv_w) -GEN_VEXT_VV(vrem_vv_d) +GEN_VEXT_VV(vdivu_vv_b, 1) +GEN_VEXT_VV(vdivu_vv_h, 2) +GEN_VEXT_VV(vdivu_vv_w, 4) +GEN_VEXT_VV(vdivu_vv_d, 8) +GEN_VEXT_VV(vdiv_vv_b, 1) +GEN_VEXT_VV(vdiv_vv_h, 2) +GEN_VEXT_VV(vdiv_vv_w, 4) +GEN_VEXT_VV(vdiv_vv_d, 8) +GEN_VEXT_VV(vremu_vv_b, 1) +GEN_VEXT_VV(vremu_vv_h, 2) +GEN_VEXT_VV(vremu_vv_w, 4) +GEN_VEXT_VV(vremu_vv_d, 8) +GEN_VEXT_VV(vrem_vv_b, 1) +GEN_VEXT_VV(vrem_vv_h, 2) +GEN_VEXT_VV(vrem_vv_w, 4) +GEN_VEXT_VV(vrem_vv_d, 8) =20 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) @@ -1639,15 +1696,15 @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4,= DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) -GEN_VEXT_VV(vwmul_vv_b) -GEN_VEXT_VV(vwmul_vv_h) -GEN_VEXT_VV(vwmul_vv_w) -GEN_VEXT_VV(vwmulu_vv_b) -GEN_VEXT_VV(vwmulu_vv_h) -GEN_VEXT_VV(vwmulu_vv_w) -GEN_VEXT_VV(vwmulsu_vv_b) -GEN_VEXT_VV(vwmulsu_vv_h) -GEN_VEXT_VV(vwmulsu_vv_w) +GEN_VEXT_VV(vwmul_vv_b, 2) +GEN_VEXT_VV(vwmul_vv_h, 4) +GEN_VEXT_VV(vwmul_vv_w, 8) +GEN_VEXT_VV(vwmulu_vv_b, 2) +GEN_VEXT_VV(vwmulu_vv_h, 4) +GEN_VEXT_VV(vwmulu_vv_w, 8) +GEN_VEXT_VV(vwmulsu_vv_b, 2) +GEN_VEXT_VV(vwmulsu_vv_h, 4) +GEN_VEXT_VV(vwmulsu_vv_w, 8) =20 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) @@ -1698,22 +1755,22 @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, = DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) -GEN_VEXT_VV(vmacc_vv_b) -GEN_VEXT_VV(vmacc_vv_h) -GEN_VEXT_VV(vmacc_vv_w) -GEN_VEXT_VV(vmacc_vv_d) -GEN_VEXT_VV(vnmsac_vv_b) -GEN_VEXT_VV(vnmsac_vv_h) -GEN_VEXT_VV(vnmsac_vv_w) -GEN_VEXT_VV(vnmsac_vv_d) -GEN_VEXT_VV(vmadd_vv_b) -GEN_VEXT_VV(vmadd_vv_h) -GEN_VEXT_VV(vmadd_vv_w) -GEN_VEXT_VV(vmadd_vv_d) -GEN_VEXT_VV(vnmsub_vv_b) -GEN_VEXT_VV(vnmsub_vv_h) -GEN_VEXT_VV(vnmsub_vv_w) -GEN_VEXT_VV(vnmsub_vv_d) +GEN_VEXT_VV(vmacc_vv_b, 1) +GEN_VEXT_VV(vmacc_vv_h, 2) +GEN_VEXT_VV(vmacc_vv_w, 4) +GEN_VEXT_VV(vmacc_vv_d, 8) +GEN_VEXT_VV(vnmsac_vv_b, 1) +GEN_VEXT_VV(vnmsac_vv_h, 2) +GEN_VEXT_VV(vnmsac_vv_w, 4) +GEN_VEXT_VV(vnmsac_vv_d, 8) +GEN_VEXT_VV(vmadd_vv_b, 1) +GEN_VEXT_VV(vmadd_vv_h, 2) +GEN_VEXT_VV(vmadd_vv_w, 4) +GEN_VEXT_VV(vmadd_vv_d, 8) +GEN_VEXT_VV(vnmsub_vv_b, 1) +GEN_VEXT_VV(vnmsub_vv_h, 2) +GEN_VEXT_VV(vnmsub_vv_w, 4) +GEN_VEXT_VV(vnmsub_vv_d, 8) =20 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ @@ -1766,15 +1823,15 @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4,= DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) -GEN_VEXT_VV(vwmaccu_vv_b) -GEN_VEXT_VV(vwmaccu_vv_h) -GEN_VEXT_VV(vwmaccu_vv_w) -GEN_VEXT_VV(vwmacc_vv_b) -GEN_VEXT_VV(vwmacc_vv_h) -GEN_VEXT_VV(vwmacc_vv_w) -GEN_VEXT_VV(vwmaccsu_vv_b) -GEN_VEXT_VV(vwmaccsu_vv_h) -GEN_VEXT_VV(vwmaccsu_vv_w) +GEN_VEXT_VV(vwmaccu_vv_b, 2) +GEN_VEXT_VV(vwmaccu_vv_h, 4) +GEN_VEXT_VV(vwmaccu_vv_w, 8) +GEN_VEXT_VV(vwmacc_vv_b, 2) +GEN_VEXT_VV(vwmacc_vv_h, 4) +GEN_VEXT_VV(vwmacc_vv_w, 8) +GEN_VEXT_VV(vwmaccsu_vv_b, 2) +GEN_VEXT_VV(vwmaccsu_vv_h, 4) +GEN_VEXT_VV(vwmaccsu_vv_w, 8) =20 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 164800509718343.02333530802309; Tue, 22 Mar 2022 20:11:37 -0700 (PDT) Received: from localhost ([::1]:53454 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrPI-0005mh-CE for importer@patchew.org; Tue, 22 Mar 2022 23:11:34 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48698) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMo-0002Yz-0K; Tue, 22 Mar 2022 23:08:58 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36410) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMk-00038Y-Ua; Tue, 22 Mar 2022 23:08:57 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 0CF1711EFB8; Wed, 23 Mar 2022 03:08:52 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Sun, 06 Mar 2022 23:10:25 -0800 Subject: [PATCH qemu v2 04/13] target/riscv: rvv: Add tail agnostic for vector load / store instructions Message-ID: <164800493107.31817.14053311036718644936-4@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005097529100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 9 +++++++ target/riscv/vector_helper.c | 32 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 603abe0e9f..e503c31111 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -711,6 +711,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, ui= nt8_t eew) data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, emul); data =3D FIELD_DP32(data, VDATA, NF, a->nf); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); } =20 @@ -748,6 +749,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, ui= nt8_t eew) data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, emul); data =3D FIELD_DP32(data, VDATA, NF, a->nf); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, true); } =20 @@ -774,6 +776,7 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a= , uint8_t eew) /* EMUL =3D 1, NFIELDS =3D 1 */ data =3D FIELD_DP32(data, VDATA, LMUL, 0); data =3D FIELD_DP32(data, VDATA, NF, 1); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); } =20 @@ -791,6 +794,7 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a= , uint8_t eew) /* EMUL =3D 1, NFIELDS =3D 1 */ data =3D FIELD_DP32(data, VDATA, LMUL, 0); data =3D FIELD_DP32(data, VDATA, NF, 1); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, true); } =20 @@ -862,6 +866,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a,= uint8_t eew) data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, emul); data =3D FIELD_DP32(data, VDATA, NF, a->nf); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); } =20 @@ -891,6 +896,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a,= uint8_t eew) data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, emul); data =3D FIELD_DP32(data, VDATA, NF, a->nf); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); fn =3D fns[eew]; if (fn =3D=3D NULL) { return false; @@ -991,6 +997,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, = uint8_t eew) data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, emul); data =3D FIELD_DP32(data, VDATA, NF, a->nf); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); } =20 @@ -1043,6 +1050,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a= , uint8_t eew) data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, emul); data =3D FIELD_DP32(data, VDATA, NF, a->nf); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true); } =20 @@ -1108,6 +1116,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, u= int8_t eew) data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, emul); data =3D FIELD_DP32(data, VDATA, NF, a->nf); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); return ldff_trans(a->rd, a->rs1, data, fn, s); } =20 diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 6c47d39251..082b12ad1d 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -289,6 +289,9 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, uint32_t i, k; uint32_t nf =3D vext_nf(desc); uint32_t max_elems =3D vext_max_elems(desc, log2_esz); + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); + uint32_t esz =3D 1 << log2_esz; =20 for (i =3D env->vstart; i < env->vl; i++, env->vstart++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -303,6 +306,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, } } env->vstart =3D 0; + /* set tail elements to 1s */ + for (k =3D 0; k < nf; ++k) { + vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * vlmax, + env->vl * esz, vlmax * esz); + } } =20 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ @@ -348,6 +356,9 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState= *env, uint32_t desc, uint32_t i, k; uint32_t nf =3D vext_nf(desc); uint32_t max_elems =3D vext_max_elems(desc, log2_esz); + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); + uint32_t esz =3D 1 << log2_esz; =20 /* load bytes from guest memory */ for (i =3D env->vstart; i < evl; i++, env->vstart++) { @@ -359,6 +370,11 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVStat= e *env, uint32_t desc, } } env->vstart =3D 0; + /* set tail elements to 1s */ + for (k =3D 0; k < nf; ++k) { + vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * vlmax, + env->vl * esz, vlmax * esz); + } } =20 /* @@ -458,6 +474,9 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t nf =3D vext_nf(desc); uint32_t vm =3D vext_vm(desc); uint32_t max_elems =3D vext_max_elems(desc, log2_esz); + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); + uint32_t esz =3D 1 << log2_esz; =20 /* load bytes from guest memory */ for (i =3D env->vstart; i < env->vl; i++, env->vstart++) { @@ -473,6 +492,11 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, } } env->vstart =3D 0; + /* set tail elements to 1s */ + for (k =3D 0; k < nf; ++k) { + vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * vlmax, + env->vl * esz, vlmax * esz); + } } =20 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) = \ @@ -540,6 +564,9 @@ vext_ldff(void *vd, void *v0, target_ulong base, uint32_t nf =3D vext_nf(desc); uint32_t vm =3D vext_vm(desc); uint32_t max_elems =3D vext_max_elems(desc, log2_esz); + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); + uint32_t esz =3D 1 << log2_esz; target_ulong addr, offset, remain; =20 /* probe every access*/ @@ -595,6 +622,11 @@ ProbeSuccess: } } env->vstart =3D 0; + /* set tail elements to 1s */ + for (k =3D 0; k < nf; ++k) { + vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * vlmax, + env->vl * esz, vlmax * esz); + } } =20 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005745072320.76727163133796; Tue, 22 Mar 2022 20:22:25 -0700 (PDT) Received: from localhost ([::1]:51310 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrZg-0006Y1-Hm for importer@patchew.org; Tue, 22 Mar 2022 23:22:16 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48746) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMq-0002eg-0m; Tue, 22 Mar 2022 23:09:00 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36412) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMn-000391-Nh; Tue, 22 Mar 2022 23:08:59 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 2BE4811EFBD; Wed, 23 Mar 2022 03:08:52 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Sun, 06 Mar 2022 23:32:57 -0800 Subject: [PATCH qemu v2 05/13] target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions Message-ID: <164800493107.31817.14053311036718644936-5@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005747029100002 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 5 + target/riscv/vector_helper.c | 311 +++++++++++++----------- 2 files changed, 178 insertions(+), 138 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index e503c31111..4e885da187 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1287,6 +1287,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, ui= nt32_t vs2, uint32_t vm, =20 data =3D FIELD_DP32(data, VDATA, VM, vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); desc =3D tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); =20 @@ -1452,6 +1453,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, ui= nt32_t vs2, uint32_t vm, =20 data =3D FIELD_DP32(data, VDATA, VM, vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); desc =3D tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); =20 @@ -1534,6 +1536,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr = *a, =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), @@ -1615,6 +1618,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr = *a, =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), @@ -1693,6 +1697,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 082b12ad1d..df1c79d8d3 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -861,10 +861,12 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) =20 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - opivx2_fn fn) + opivx2_fn fn, uint32_t esz) { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); uint32_t i; =20 for (i =3D env->vstart; i < vl; i++) { @@ -874,30 +876,32 @@ static void do_vext_vx(void *vd, void *v0, target_lon= g s1, void *vs2, fn(vd, s1, vs2, i); } env->vstart =3D 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); } =20 /* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME) \ +#define GEN_VEXT_VX(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ do_vext_vx(vd, v0, s1, vs2, env, desc, \ - do_##NAME); \ -} - -GEN_VEXT_VX(vadd_vx_b) -GEN_VEXT_VX(vadd_vx_h) -GEN_VEXT_VX(vadd_vx_w) -GEN_VEXT_VX(vadd_vx_d) -GEN_VEXT_VX(vsub_vx_b) -GEN_VEXT_VX(vsub_vx_h) -GEN_VEXT_VX(vsub_vx_w) -GEN_VEXT_VX(vsub_vx_d) -GEN_VEXT_VX(vrsub_vx_b) -GEN_VEXT_VX(vrsub_vx_h) -GEN_VEXT_VX(vrsub_vx_w) -GEN_VEXT_VX(vrsub_vx_d) + do_##NAME, ESZ); \ +} + +GEN_VEXT_VX(vadd_vx_b, 1) +GEN_VEXT_VX(vadd_vx_h, 2) +GEN_VEXT_VX(vadd_vx_w, 4) +GEN_VEXT_VX(vadd_vx_d, 8) +GEN_VEXT_VX(vsub_vx_b, 1) +GEN_VEXT_VX(vsub_vx_h, 2) +GEN_VEXT_VX(vsub_vx_w, 4) +GEN_VEXT_VX(vsub_vx_d, 8) +GEN_VEXT_VX(vrsub_vx_b, 1) +GEN_VEXT_VX(vrsub_vx_h, 2) +GEN_VEXT_VX(vrsub_vx_w, 4) +GEN_VEXT_VX(vrsub_vx_d, 8) =20 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) { @@ -1025,30 +1029,30 @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_= ADD) RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) -GEN_VEXT_VX(vwaddu_vx_b) -GEN_VEXT_VX(vwaddu_vx_h) -GEN_VEXT_VX(vwaddu_vx_w) -GEN_VEXT_VX(vwsubu_vx_b) -GEN_VEXT_VX(vwsubu_vx_h) -GEN_VEXT_VX(vwsubu_vx_w) -GEN_VEXT_VX(vwadd_vx_b) -GEN_VEXT_VX(vwadd_vx_h) -GEN_VEXT_VX(vwadd_vx_w) -GEN_VEXT_VX(vwsub_vx_b) -GEN_VEXT_VX(vwsub_vx_h) -GEN_VEXT_VX(vwsub_vx_w) -GEN_VEXT_VX(vwaddu_wx_b) -GEN_VEXT_VX(vwaddu_wx_h) -GEN_VEXT_VX(vwaddu_wx_w) -GEN_VEXT_VX(vwsubu_wx_b) -GEN_VEXT_VX(vwsubu_wx_h) -GEN_VEXT_VX(vwsubu_wx_w) -GEN_VEXT_VX(vwadd_wx_b) -GEN_VEXT_VX(vwadd_wx_h) -GEN_VEXT_VX(vwadd_wx_w) -GEN_VEXT_VX(vwsub_wx_b) -GEN_VEXT_VX(vwsub_wx_h) -GEN_VEXT_VX(vwsub_wx_w) +GEN_VEXT_VX(vwaddu_vx_b, 2) +GEN_VEXT_VX(vwaddu_vx_h, 4) +GEN_VEXT_VX(vwaddu_vx_w, 8) +GEN_VEXT_VX(vwsubu_vx_b, 2) +GEN_VEXT_VX(vwsubu_vx_h, 4) +GEN_VEXT_VX(vwsubu_vx_w, 8) +GEN_VEXT_VX(vwadd_vx_b, 2) +GEN_VEXT_VX(vwadd_vx_h, 4) +GEN_VEXT_VX(vwadd_vx_w, 8) +GEN_VEXT_VX(vwsub_vx_b, 2) +GEN_VEXT_VX(vwsub_vx_h, 4) +GEN_VEXT_VX(vwsub_vx_w, 8) +GEN_VEXT_VX(vwaddu_wx_b, 2) +GEN_VEXT_VX(vwaddu_wx_h, 4) +GEN_VEXT_VX(vwaddu_wx_w, 8) +GEN_VEXT_VX(vwsubu_wx_b, 2) +GEN_VEXT_VX(vwsubu_wx_h, 4) +GEN_VEXT_VX(vwsubu_wx_w, 8) +GEN_VEXT_VX(vwadd_wx_b, 2) +GEN_VEXT_VX(vwadd_wx_h, 4) +GEN_VEXT_VX(vwadd_wx_w, 8) +GEN_VEXT_VX(vwsub_wx_b, 2) +GEN_VEXT_VX(vwsub_wx_h, 4) +GEN_VEXT_VX(vwsub_wx_w, 8) =20 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ #define DO_VADC(N, M, C) (N + M + C) @@ -1059,6 +1063,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1069,6 +1077,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ *((ETYPE *)vd + H(i)) =3D DO_OP(s2, s1, carry); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) @@ -1086,6 +1097,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D = \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -1095,6 +1110,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ *((ETYPE *)vd + H(i)) =3D DO_OP(s2, (ETYPE)(target_long)s1, carry)= ;\ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz);\ } =20 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) @@ -1117,6 +1134,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ { \ uint32_t vl =3D env->vl; \ uint32_t vm =3D vext_vm(desc); \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1126,6 +1146,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) @@ -1144,6 +1170,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ { \ uint32_t vl =3D env->vl; \ uint32_t vm =3D vext_vm(desc); \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1153,6 +1182,12 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, \ DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) @@ -1203,18 +1238,18 @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) -GEN_VEXT_VX(vand_vx_b) -GEN_VEXT_VX(vand_vx_h) -GEN_VEXT_VX(vand_vx_w) -GEN_VEXT_VX(vand_vx_d) -GEN_VEXT_VX(vor_vx_b) -GEN_VEXT_VX(vor_vx_h) -GEN_VEXT_VX(vor_vx_w) -GEN_VEXT_VX(vor_vx_d) -GEN_VEXT_VX(vxor_vx_b) -GEN_VEXT_VX(vxor_vx_h) -GEN_VEXT_VX(vxor_vx_w) -GEN_VEXT_VX(vxor_vx_d) +GEN_VEXT_VX(vand_vx_b, 1) +GEN_VEXT_VX(vand_vx_h, 2) +GEN_VEXT_VX(vand_vx_w, 4) +GEN_VEXT_VX(vand_vx_d, 8) +GEN_VEXT_VX(vor_vx_b, 1) +GEN_VEXT_VX(vor_vx_h, 2) +GEN_VEXT_VX(vor_vx_w, 4) +GEN_VEXT_VX(vor_vx_d, 8) +GEN_VEXT_VX(vxor_vx_b, 1) +GEN_VEXT_VX(vxor_vx_h, 2) +GEN_VEXT_VX(vxor_vx_w, 4) +GEN_VEXT_VX(vxor_vx_d, 8) =20 /* Vector Single-Width Bit Shift Instructions */ #define DO_SLL(N, M) (N << (M)) @@ -1468,22 +1503,22 @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) -GEN_VEXT_VX(vminu_vx_b) -GEN_VEXT_VX(vminu_vx_h) -GEN_VEXT_VX(vminu_vx_w) -GEN_VEXT_VX(vminu_vx_d) -GEN_VEXT_VX(vmin_vx_b) -GEN_VEXT_VX(vmin_vx_h) -GEN_VEXT_VX(vmin_vx_w) -GEN_VEXT_VX(vmin_vx_d) -GEN_VEXT_VX(vmaxu_vx_b) -GEN_VEXT_VX(vmaxu_vx_h) -GEN_VEXT_VX(vmaxu_vx_w) -GEN_VEXT_VX(vmaxu_vx_d) -GEN_VEXT_VX(vmax_vx_b) -GEN_VEXT_VX(vmax_vx_h) -GEN_VEXT_VX(vmax_vx_w) -GEN_VEXT_VX(vmax_vx_d) +GEN_VEXT_VX(vminu_vx_b, 1) +GEN_VEXT_VX(vminu_vx_h, 2) +GEN_VEXT_VX(vminu_vx_w, 4) +GEN_VEXT_VX(vminu_vx_d, 8) +GEN_VEXT_VX(vmin_vx_b, 1) +GEN_VEXT_VX(vmin_vx_h, 2) +GEN_VEXT_VX(vmin_vx_w, 4) +GEN_VEXT_VX(vmin_vx_d, 8) +GEN_VEXT_VX(vmaxu_vx_b, 1) +GEN_VEXT_VX(vmaxu_vx_h, 2) +GEN_VEXT_VX(vmaxu_vx_w, 4) +GEN_VEXT_VX(vmaxu_vx_d, 8) +GEN_VEXT_VX(vmax_vx_b, 1) +GEN_VEXT_VX(vmax_vx_h, 2) +GEN_VEXT_VX(vmax_vx_w, 4) +GEN_VEXT_VX(vmax_vx_d, 8) =20 /* Vector Single-Width Integer Multiply Instructions */ #define DO_MUL(N, M) (N * M) @@ -1627,22 +1662,22 @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_= mulhsu_b) RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) -GEN_VEXT_VX(vmul_vx_b) -GEN_VEXT_VX(vmul_vx_h) -GEN_VEXT_VX(vmul_vx_w) -GEN_VEXT_VX(vmul_vx_d) -GEN_VEXT_VX(vmulh_vx_b) -GEN_VEXT_VX(vmulh_vx_h) -GEN_VEXT_VX(vmulh_vx_w) -GEN_VEXT_VX(vmulh_vx_d) -GEN_VEXT_VX(vmulhu_vx_b) -GEN_VEXT_VX(vmulhu_vx_h) -GEN_VEXT_VX(vmulhu_vx_w) -GEN_VEXT_VX(vmulhu_vx_d) -GEN_VEXT_VX(vmulhsu_vx_b) -GEN_VEXT_VX(vmulhsu_vx_h) -GEN_VEXT_VX(vmulhsu_vx_w) -GEN_VEXT_VX(vmulhsu_vx_d) +GEN_VEXT_VX(vmul_vx_b, 1) +GEN_VEXT_VX(vmul_vx_h, 2) +GEN_VEXT_VX(vmul_vx_w, 4) +GEN_VEXT_VX(vmul_vx_d, 8) +GEN_VEXT_VX(vmulh_vx_b, 1) +GEN_VEXT_VX(vmulh_vx_h, 2) +GEN_VEXT_VX(vmulh_vx_w, 4) +GEN_VEXT_VX(vmulh_vx_d, 8) +GEN_VEXT_VX(vmulhu_vx_b, 1) +GEN_VEXT_VX(vmulhu_vx_h, 2) +GEN_VEXT_VX(vmulhu_vx_w, 4) +GEN_VEXT_VX(vmulhu_vx_d, 8) +GEN_VEXT_VX(vmulhsu_vx_b, 1) +GEN_VEXT_VX(vmulhsu_vx_h, 2) +GEN_VEXT_VX(vmulhsu_vx_w, 4) +GEN_VEXT_VX(vmulhsu_vx_d, 8) =20 /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M =3D=3D 0) ? (__typeof(N))(-1) : N / M) @@ -1701,22 +1736,22 @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) -GEN_VEXT_VX(vdivu_vx_b) -GEN_VEXT_VX(vdivu_vx_h) -GEN_VEXT_VX(vdivu_vx_w) -GEN_VEXT_VX(vdivu_vx_d) -GEN_VEXT_VX(vdiv_vx_b) -GEN_VEXT_VX(vdiv_vx_h) -GEN_VEXT_VX(vdiv_vx_w) -GEN_VEXT_VX(vdiv_vx_d) -GEN_VEXT_VX(vremu_vx_b) -GEN_VEXT_VX(vremu_vx_h) -GEN_VEXT_VX(vremu_vx_w) -GEN_VEXT_VX(vremu_vx_d) -GEN_VEXT_VX(vrem_vx_b) -GEN_VEXT_VX(vrem_vx_h) -GEN_VEXT_VX(vrem_vx_w) -GEN_VEXT_VX(vrem_vx_d) +GEN_VEXT_VX(vdivu_vx_b, 1) +GEN_VEXT_VX(vdivu_vx_h, 2) +GEN_VEXT_VX(vdivu_vx_w, 4) +GEN_VEXT_VX(vdivu_vx_d, 8) +GEN_VEXT_VX(vdiv_vx_b, 1) +GEN_VEXT_VX(vdiv_vx_h, 2) +GEN_VEXT_VX(vdiv_vx_w, 4) +GEN_VEXT_VX(vdiv_vx_d, 8) +GEN_VEXT_VX(vremu_vx_b, 1) +GEN_VEXT_VX(vremu_vx_h, 2) +GEN_VEXT_VX(vremu_vx_w, 4) +GEN_VEXT_VX(vremu_vx_d, 8) +GEN_VEXT_VX(vrem_vx_b, 1) +GEN_VEXT_VX(vrem_vx_h, 2) +GEN_VEXT_VX(vrem_vx_w, 4) +GEN_VEXT_VX(vrem_vx_d, 8) =20 /* Vector Widening Integer Multiply Instructions */ RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) @@ -1747,15 +1782,15 @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_= MUL) RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) -GEN_VEXT_VX(vwmul_vx_b) -GEN_VEXT_VX(vwmul_vx_h) -GEN_VEXT_VX(vwmul_vx_w) -GEN_VEXT_VX(vwmulu_vx_b) -GEN_VEXT_VX(vwmulu_vx_h) -GEN_VEXT_VX(vwmulu_vx_w) -GEN_VEXT_VX(vwmulsu_vx_b) -GEN_VEXT_VX(vwmulsu_vx_h) -GEN_VEXT_VX(vwmulsu_vx_w) +GEN_VEXT_VX(vwmul_vx_b, 2) +GEN_VEXT_VX(vwmul_vx_h, 4) +GEN_VEXT_VX(vwmul_vx_w, 8) +GEN_VEXT_VX(vwmulu_vx_b, 2) +GEN_VEXT_VX(vwmulu_vx_h, 4) +GEN_VEXT_VX(vwmulu_vx_w, 8) +GEN_VEXT_VX(vwmulsu_vx_b, 2) +GEN_VEXT_VX(vwmulsu_vx_h, 4) +GEN_VEXT_VX(vwmulsu_vx_w, 8) =20 /* Vector Single-Width Integer Multiply-Add Instructions */ #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -1828,22 +1863,22 @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_N= MSUB) RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) -GEN_VEXT_VX(vmacc_vx_b) -GEN_VEXT_VX(vmacc_vx_h) -GEN_VEXT_VX(vmacc_vx_w) -GEN_VEXT_VX(vmacc_vx_d) -GEN_VEXT_VX(vnmsac_vx_b) -GEN_VEXT_VX(vnmsac_vx_h) -GEN_VEXT_VX(vnmsac_vx_w) -GEN_VEXT_VX(vnmsac_vx_d) -GEN_VEXT_VX(vmadd_vx_b) -GEN_VEXT_VX(vmadd_vx_h) -GEN_VEXT_VX(vmadd_vx_w) -GEN_VEXT_VX(vmadd_vx_d) -GEN_VEXT_VX(vnmsub_vx_b) -GEN_VEXT_VX(vnmsub_vx_h) -GEN_VEXT_VX(vnmsub_vx_w) -GEN_VEXT_VX(vnmsub_vx_d) +GEN_VEXT_VX(vmacc_vx_b, 1) +GEN_VEXT_VX(vmacc_vx_h, 2) +GEN_VEXT_VX(vmacc_vx_w, 4) +GEN_VEXT_VX(vmacc_vx_d, 8) +GEN_VEXT_VX(vnmsac_vx_b, 1) +GEN_VEXT_VX(vnmsac_vx_h, 2) +GEN_VEXT_VX(vnmsac_vx_w, 4) +GEN_VEXT_VX(vnmsac_vx_d, 8) +GEN_VEXT_VX(vmadd_vx_b, 1) +GEN_VEXT_VX(vmadd_vx_h, 2) +GEN_VEXT_VX(vmadd_vx_w, 4) +GEN_VEXT_VX(vmadd_vx_d, 8) +GEN_VEXT_VX(vnmsub_vx_b, 1) +GEN_VEXT_VX(vnmsub_vx_h, 2) +GEN_VEXT_VX(vnmsub_vx_w, 4) +GEN_VEXT_VX(vnmsub_vx_d, 8) =20 /* Vector Widening Integer Multiply-Add Instructions */ RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) @@ -1877,18 +1912,18 @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, D= O_MACC) RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) -GEN_VEXT_VX(vwmaccu_vx_b) -GEN_VEXT_VX(vwmaccu_vx_h) -GEN_VEXT_VX(vwmaccu_vx_w) -GEN_VEXT_VX(vwmacc_vx_b) -GEN_VEXT_VX(vwmacc_vx_h) -GEN_VEXT_VX(vwmacc_vx_w) -GEN_VEXT_VX(vwmaccsu_vx_b) -GEN_VEXT_VX(vwmaccsu_vx_h) -GEN_VEXT_VX(vwmaccsu_vx_w) -GEN_VEXT_VX(vwmaccus_vx_b) -GEN_VEXT_VX(vwmaccus_vx_h) -GEN_VEXT_VX(vwmaccus_vx_w) +GEN_VEXT_VX(vwmaccu_vx_b, 2) +GEN_VEXT_VX(vwmaccu_vx_h, 4) +GEN_VEXT_VX(vwmaccu_vx_w, 8) +GEN_VEXT_VX(vwmacc_vx_b, 2) +GEN_VEXT_VX(vwmacc_vx_h, 4) +GEN_VEXT_VX(vwmacc_vx_w, 8) +GEN_VEXT_VX(vwmaccsu_vx_b, 2) +GEN_VEXT_VX(vwmaccsu_vx_h, 4) +GEN_VEXT_VX(vwmaccsu_vx_w, 8) +GEN_VEXT_VX(vwmaccus_vx_b, 2) +GEN_VEXT_VX(vwmaccus_vx_h, 4) +GEN_VEXT_VX(vwmaccus_vx_w, 8) =20 /* Vector Integer Merge and Move Instructions */ #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005542433384.2382216473135; Tue, 22 Mar 2022 20:19:02 -0700 (PDT) Received: from localhost ([::1]:44426 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrWX-0001tL-6w for importer@patchew.org; Tue, 22 Mar 2022 23:19:01 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48708) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMo-0002a0-JB; Tue, 22 Mar 2022 23:08:58 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36414) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMn-000393-3L; Tue, 22 Mar 2022 23:08:58 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 4A1B711F033; Wed, 23 Mar 2022 03:08:52 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 01:38:18 -0800 Subject: [PATCH qemu v2 06/13] target/riscv: rvv: Add tail agnostic for vector integer shift instructions Message-ID: <164800493107.31817.14053311036718644936-6@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005544224100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 1 + target/riscv/vector_helper.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 4e885da187..e014bdac95 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1880,6 +1880,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index df1c79d8d3..860e51154b 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1262,6 +1262,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ { \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(TS1); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -1273,6 +1276,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ *((TS1 *)vd + HS1(i)) =3D OP(s2, s1 & MASK); = \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) @@ -1297,6 +1302,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(TD); \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1307,6 +1316,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ *((TD *)vd + HD(i)) =3D OP(s2, s1 & MASK); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005455732339.9312025108202; Tue, 22 Mar 2022 20:17:35 -0700 (PDT) Received: from localhost ([::1]:41204 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrV8-0008Af-Ot for importer@patchew.org; Tue, 22 Mar 2022 23:17:34 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48740) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMp-0002cS-Em; Tue, 22 Mar 2022 23:08:59 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36416) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMn-00039G-R9; Tue, 22 Mar 2022 23:08:59 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 711F511F0B5; Wed, 23 Mar 2022 03:08:52 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 01:43:53 -0800 Subject: [PATCH qemu v2 07/13] target/riscv: rvv: Add tail agnostic for vector integer comparison instructions Message-ID: <164800493107.31817.14053311036718644936-7@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005457791100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 860e51154b..e3393f7d99 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1363,6 +1363,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1374,6 +1377,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) @@ -1412,6 +1421,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1423,6 +1435,12 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, void *vs2, \ DO_OP(s2, (ETYPE)(target_long)s1)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005325672506.0685362741689; Tue, 22 Mar 2022 20:15:25 -0700 (PDT) Received: from localhost ([::1]:34996 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrT2-0003wd-Lj for importer@patchew.org; Tue, 22 Mar 2022 23:15:24 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48744) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMp-0002cg-HG; Tue, 22 Mar 2022 23:08:59 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36418) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMn-00039H-QX; Tue, 22 Mar 2022 23:08:59 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 8D64011F0F0; Wed, 23 Mar 2022 03:08:52 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 01:53:23 -0800 Subject: [PATCH qemu v2 08/13] target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions Message-ID: <164800493107.31817.14053311036718644936-8@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005326926100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 4 ++++ target/riscv/vector_helper.c | 28 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index e014bdac95..88912d9864 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -2082,6 +2082,7 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_= v *a) MAXSZ(s), MAXSZ(s)); } else { uint32_t data =3D FIELD_DP32(0, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_2_ptr * const fns[4] =3D { gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, @@ -2124,6 +2125,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_= x *a) TCGv_i64 s1_i64 =3D tcg_temp_new_i64(); TCGv_ptr dest =3D tcg_temp_new_ptr(); uint32_t data =3D FIELD_DP32(0, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[4] =3D { gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, @@ -2162,6 +2164,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_= i *a) TCGv_i64 s1; TCGv_ptr dest; uint32_t data =3D FIELD_DP32(0, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[4] =3D { gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, @@ -2745,6 +2748,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_= v_f *a) TCGv_ptr dest; TCGv_i32 desc; uint32_t data =3D FIELD_DP32(0, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[3] =3D { gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index e3393f7d99..77a5629b73 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -1961,6 +1961,10 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState= *env, \ uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), \ + env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -1968,6 +1972,9 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState = *env, \ *((ETYPE *)vd + H(i)) =3D s1; \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) @@ -1980,12 +1987,19 @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVSt= ate *env, \ uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), \ + env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ *((ETYPE *)vd + H(i)) =3D (ETYPE)s1; \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) @@ -1998,6 +2012,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), \ + env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -2005,6 +2023,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ *((ETYPE *)vd + H(i)) =3D *(vt + H(i)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) @@ -2017,6 +2038,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s= 1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), \ + env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -2026,6 +2051,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , \ *((ETYPE *)vd + H(i)) =3D d; \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005746168892.289873646004; Tue, 22 Mar 2022 20:22:26 -0700 (PDT) Received: from localhost ([::1]:52024 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrZp-000763-34 for importer@patchew.org; Tue, 22 Mar 2022 23:22:25 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48840) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrNC-00041Q-K2; Tue, 22 Mar 2022 23:09:22 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36420) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrN8-00039j-Su; Tue, 22 Mar 2022 23:09:22 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id AA6A311F0F3; Wed, 23 Mar 2022 03:08:52 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 02:04:21 -0800 Subject: [PATCH qemu v2 09/13] target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions Message-ID: <164800493107.31817.14053311036718644936-9@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005747027100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 220 ++++++++++++++++++----------------- 1 file changed, 114 insertions(+), 106 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 77a5629b73..661e78797f 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2103,10 +2103,12 @@ static inline void vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - opivv2_rm_fn *fn) + opivv2_rm_fn *fn, uint32_t esz) { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); =20 switch (env->vxrm) { case 0: /* rnu */ @@ -2126,15 +2128,17 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *v= s2, env, vl, vm, 3, fn); break; } + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); } =20 /* generate helpers for fixed point instructions with OPIVV format */ -#define GEN_VEXT_VV_RM(NAME) \ +#define GEN_VEXT_VV_RM(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } =20 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint= 8_t b) @@ -2184,10 +2188,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H= 1, saddu8) RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) -GEN_VEXT_VV_RM(vsaddu_vv_b) -GEN_VEXT_VV_RM(vsaddu_vv_h) -GEN_VEXT_VV_RM(vsaddu_vv_w) -GEN_VEXT_VV_RM(vsaddu_vv_d) +GEN_VEXT_VV_RM(vsaddu_vv_b, 1) +GEN_VEXT_VV_RM(vsaddu_vv_h, 2) +GEN_VEXT_VV_RM(vsaddu_vv_w, 4) +GEN_VEXT_VV_RM(vsaddu_vv_d, 8) =20 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, CPURISCVState *env, int vxrm); @@ -2220,10 +2224,12 @@ static inline void vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - opivx2_rm_fn *fn) + opivx2_rm_fn *fn, uint32_t esz) { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); =20 switch (env->vxrm) { case 0: /* rnu */ @@ -2243,25 +2249,27 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, vo= id *vs2, env, vl, vm, 3, fn); break; } + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); } =20 /* generate helpers for fixed point instructions with OPIVX format */ -#define GEN_VEXT_VX_RM(NAME) \ +#define GEN_VEXT_VX_RM(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } =20 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) -GEN_VEXT_VX_RM(vsaddu_vx_b) -GEN_VEXT_VX_RM(vsaddu_vx_h) -GEN_VEXT_VX_RM(vsaddu_vx_w) -GEN_VEXT_VX_RM(vsaddu_vx_d) +GEN_VEXT_VX_RM(vsaddu_vx_b, 1) +GEN_VEXT_VX_RM(vsaddu_vx_h, 2) +GEN_VEXT_VX_RM(vsaddu_vx_w, 4) +GEN_VEXT_VX_RM(vsaddu_vx_d, 8) =20 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t = b) { @@ -2307,19 +2315,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1= , sadd8) RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) -GEN_VEXT_VV_RM(vsadd_vv_b) -GEN_VEXT_VV_RM(vsadd_vv_h) -GEN_VEXT_VV_RM(vsadd_vv_w) -GEN_VEXT_VV_RM(vsadd_vv_d) +GEN_VEXT_VV_RM(vsadd_vv_b, 1) +GEN_VEXT_VV_RM(vsadd_vv_h, 2) +GEN_VEXT_VV_RM(vsadd_vv_w, 4) +GEN_VEXT_VV_RM(vsadd_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) -GEN_VEXT_VX_RM(vsadd_vx_b) -GEN_VEXT_VX_RM(vsadd_vx_h) -GEN_VEXT_VX_RM(vsadd_vx_w) -GEN_VEXT_VX_RM(vsadd_vx_d) +GEN_VEXT_VX_RM(vsadd_vx_b, 1) +GEN_VEXT_VX_RM(vsadd_vx_h, 2) +GEN_VEXT_VX_RM(vsadd_vx_w, 4) +GEN_VEXT_VX_RM(vsadd_vx_d, 8) =20 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint= 8_t b) { @@ -2368,19 +2376,19 @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H= 1, ssubu8) RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) -GEN_VEXT_VV_RM(vssubu_vv_b) -GEN_VEXT_VV_RM(vssubu_vv_h) -GEN_VEXT_VV_RM(vssubu_vv_w) -GEN_VEXT_VV_RM(vssubu_vv_d) +GEN_VEXT_VV_RM(vssubu_vv_b, 1) +GEN_VEXT_VV_RM(vssubu_vv_h, 2) +GEN_VEXT_VV_RM(vssubu_vv_w, 4) +GEN_VEXT_VV_RM(vssubu_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) -GEN_VEXT_VX_RM(vssubu_vx_b) -GEN_VEXT_VX_RM(vssubu_vx_h) -GEN_VEXT_VX_RM(vssubu_vx_w) -GEN_VEXT_VX_RM(vssubu_vx_d) +GEN_VEXT_VX_RM(vssubu_vx_b, 1) +GEN_VEXT_VX_RM(vssubu_vx_h, 2) +GEN_VEXT_VX_RM(vssubu_vx_w, 4) +GEN_VEXT_VX_RM(vssubu_vx_d, 8) =20 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t = b) { @@ -2426,19 +2434,19 @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1= , ssub8) RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) -GEN_VEXT_VV_RM(vssub_vv_b) -GEN_VEXT_VV_RM(vssub_vv_h) -GEN_VEXT_VV_RM(vssub_vv_w) -GEN_VEXT_VV_RM(vssub_vv_d) +GEN_VEXT_VV_RM(vssub_vv_b, 1) +GEN_VEXT_VV_RM(vssub_vv_h, 2) +GEN_VEXT_VV_RM(vssub_vv_w, 4) +GEN_VEXT_VV_RM(vssub_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) -GEN_VEXT_VX_RM(vssub_vx_b) -GEN_VEXT_VX_RM(vssub_vx_h) -GEN_VEXT_VX_RM(vssub_vx_w) -GEN_VEXT_VX_RM(vssub_vx_d) +GEN_VEXT_VX_RM(vssub_vx_b, 1) +GEN_VEXT_VX_RM(vssub_vx_h, 2) +GEN_VEXT_VX_RM(vssub_vx_w, 4) +GEN_VEXT_VX_RM(vssub_vx_d, 8) =20 /* Vector Single-Width Averaging Add and Subtract */ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) @@ -2490,19 +2498,19 @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1= , aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) -GEN_VEXT_VV_RM(vaadd_vv_b) -GEN_VEXT_VV_RM(vaadd_vv_h) -GEN_VEXT_VV_RM(vaadd_vv_w) -GEN_VEXT_VV_RM(vaadd_vv_d) +GEN_VEXT_VV_RM(vaadd_vv_b, 1) +GEN_VEXT_VV_RM(vaadd_vv_h, 2) +GEN_VEXT_VV_RM(vaadd_vv_w, 4) +GEN_VEXT_VV_RM(vaadd_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) -GEN_VEXT_VX_RM(vaadd_vx_b) -GEN_VEXT_VX_RM(vaadd_vx_h) -GEN_VEXT_VX_RM(vaadd_vx_w) -GEN_VEXT_VX_RM(vaadd_vx_d) +GEN_VEXT_VX_RM(vaadd_vx_b, 1) +GEN_VEXT_VX_RM(vaadd_vx_h, 2) +GEN_VEXT_VX_RM(vaadd_vx_w, 4) +GEN_VEXT_VX_RM(vaadd_vx_d, 8) =20 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2527,19 +2535,19 @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H= 1, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) -GEN_VEXT_VV_RM(vaaddu_vv_b) -GEN_VEXT_VV_RM(vaaddu_vv_h) -GEN_VEXT_VV_RM(vaaddu_vv_w) -GEN_VEXT_VV_RM(vaaddu_vv_d) +GEN_VEXT_VV_RM(vaaddu_vv_b, 1) +GEN_VEXT_VV_RM(vaaddu_vv_h, 2) +GEN_VEXT_VV_RM(vaaddu_vv_w, 4) +GEN_VEXT_VV_RM(vaaddu_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) -GEN_VEXT_VX_RM(vaaddu_vx_b) -GEN_VEXT_VX_RM(vaaddu_vx_h) -GEN_VEXT_VX_RM(vaaddu_vx_w) -GEN_VEXT_VX_RM(vaaddu_vx_d) +GEN_VEXT_VX_RM(vaaddu_vx_b, 1) +GEN_VEXT_VX_RM(vaaddu_vx_h, 2) +GEN_VEXT_VX_RM(vaaddu_vx_w, 4) +GEN_VEXT_VX_RM(vaaddu_vx_d, 8) =20 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int3= 2_t b) { @@ -2563,19 +2571,19 @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1= , asub32) RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) -GEN_VEXT_VV_RM(vasub_vv_b) -GEN_VEXT_VV_RM(vasub_vv_h) -GEN_VEXT_VV_RM(vasub_vv_w) -GEN_VEXT_VV_RM(vasub_vv_d) +GEN_VEXT_VV_RM(vasub_vv_b, 1) +GEN_VEXT_VV_RM(vasub_vv_h, 2) +GEN_VEXT_VV_RM(vasub_vv_w, 4) +GEN_VEXT_VV_RM(vasub_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) -GEN_VEXT_VX_RM(vasub_vx_b) -GEN_VEXT_VX_RM(vasub_vx_h) -GEN_VEXT_VX_RM(vasub_vx_w) -GEN_VEXT_VX_RM(vasub_vx_d) +GEN_VEXT_VX_RM(vasub_vx_b, 1) +GEN_VEXT_VX_RM(vasub_vx_h, 2) +GEN_VEXT_VX_RM(vasub_vx_w, 4) +GEN_VEXT_VX_RM(vasub_vx_d, 8) =20 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -2600,19 +2608,19 @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H= 1, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) -GEN_VEXT_VV_RM(vasubu_vv_b) -GEN_VEXT_VV_RM(vasubu_vv_h) -GEN_VEXT_VV_RM(vasubu_vv_w) -GEN_VEXT_VV_RM(vasubu_vv_d) +GEN_VEXT_VV_RM(vasubu_vv_b, 1) +GEN_VEXT_VV_RM(vasubu_vv_h, 2) +GEN_VEXT_VV_RM(vasubu_vv_w, 4) +GEN_VEXT_VV_RM(vasubu_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) -GEN_VEXT_VX_RM(vasubu_vx_b) -GEN_VEXT_VX_RM(vasubu_vx_h) -GEN_VEXT_VX_RM(vasubu_vx_w) -GEN_VEXT_VX_RM(vasubu_vx_d) +GEN_VEXT_VX_RM(vasubu_vx_b, 1) +GEN_VEXT_VX_RM(vasubu_vx_h, 2) +GEN_VEXT_VX_RM(vasubu_vx_w, 4) +GEN_VEXT_VX_RM(vasubu_vx_d, 8) =20 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t= b) @@ -2707,19 +2715,19 @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1= , vsmul8) RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) -GEN_VEXT_VV_RM(vsmul_vv_b) -GEN_VEXT_VV_RM(vsmul_vv_h) -GEN_VEXT_VV_RM(vsmul_vv_w) -GEN_VEXT_VV_RM(vsmul_vv_d) +GEN_VEXT_VV_RM(vsmul_vv_b, 1) +GEN_VEXT_VV_RM(vsmul_vv_h, 2) +GEN_VEXT_VV_RM(vsmul_vv_w, 4) +GEN_VEXT_VV_RM(vsmul_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) -GEN_VEXT_VX_RM(vsmul_vx_b) -GEN_VEXT_VX_RM(vsmul_vx_h) -GEN_VEXT_VX_RM(vsmul_vx_w) -GEN_VEXT_VX_RM(vsmul_vx_d) +GEN_VEXT_VX_RM(vsmul_vx_b, 1) +GEN_VEXT_VX_RM(vsmul_vx_h, 2) +GEN_VEXT_VX_RM(vsmul_vx_w, 4) +GEN_VEXT_VX_RM(vsmul_vx_d, 8) =20 /* Vector Single-Width Scaling Shift Instructions */ static inline uint8_t @@ -2766,19 +2774,19 @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1= , vssrl8) RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) -GEN_VEXT_VV_RM(vssrl_vv_b) -GEN_VEXT_VV_RM(vssrl_vv_h) -GEN_VEXT_VV_RM(vssrl_vv_w) -GEN_VEXT_VV_RM(vssrl_vv_d) +GEN_VEXT_VV_RM(vssrl_vv_b, 1) +GEN_VEXT_VV_RM(vssrl_vv_h, 2) +GEN_VEXT_VV_RM(vssrl_vv_w, 4) +GEN_VEXT_VV_RM(vssrl_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) -GEN_VEXT_VX_RM(vssrl_vx_b) -GEN_VEXT_VX_RM(vssrl_vx_h) -GEN_VEXT_VX_RM(vssrl_vx_w) -GEN_VEXT_VX_RM(vssrl_vx_d) +GEN_VEXT_VX_RM(vssrl_vx_b, 1) +GEN_VEXT_VX_RM(vssrl_vx_h, 2) +GEN_VEXT_VX_RM(vssrl_vx_w, 4) +GEN_VEXT_VX_RM(vssrl_vx_d, 8) =20 static inline int8_t vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -2825,19 +2833,19 @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1= , vssra8) RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) -GEN_VEXT_VV_RM(vssra_vv_b) -GEN_VEXT_VV_RM(vssra_vv_h) -GEN_VEXT_VV_RM(vssra_vv_w) -GEN_VEXT_VV_RM(vssra_vv_d) +GEN_VEXT_VV_RM(vssra_vv_b, 1) +GEN_VEXT_VV_RM(vssra_vv_h, 2) +GEN_VEXT_VV_RM(vssra_vv_w, 4) +GEN_VEXT_VV_RM(vssra_vv_d, 8) =20 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) -GEN_VEXT_VX_RM(vssra_vx_b) -GEN_VEXT_VX_RM(vssra_vx_h) -GEN_VEXT_VX_RM(vssra_vx_w) -GEN_VEXT_VX_RM(vssra_vx_d) +GEN_VEXT_VX_RM(vssra_vx_b, 1) +GEN_VEXT_VX_RM(vssra_vx_h, 2) +GEN_VEXT_VX_RM(vssra_vx_w, 4) +GEN_VEXT_VX_RM(vssra_vx_d, 8) =20 /* Vector Narrowing Fixed-Point Clip Instructions */ static inline int8_t @@ -2900,16 +2908,16 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, i= nt32_t b) RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) -GEN_VEXT_VV_RM(vnclip_wv_b) -GEN_VEXT_VV_RM(vnclip_wv_h) -GEN_VEXT_VV_RM(vnclip_wv_w) +GEN_VEXT_VV_RM(vnclip_wv_b, 1) +GEN_VEXT_VV_RM(vnclip_wv_h, 2) +GEN_VEXT_VV_RM(vnclip_wv_w, 4) =20 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) -GEN_VEXT_VX_RM(vnclip_wx_b) -GEN_VEXT_VX_RM(vnclip_wx_h) -GEN_VEXT_VX_RM(vnclip_wx_w) +GEN_VEXT_VX_RM(vnclip_wx_b, 1) +GEN_VEXT_VX_RM(vnclip_wx_h, 2) +GEN_VEXT_VX_RM(vnclip_wx_w, 4) =20 static inline uint8_t vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) @@ -2962,16 +2970,16 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a,= uint32_t b) RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) -GEN_VEXT_VV_RM(vnclipu_wv_b) -GEN_VEXT_VV_RM(vnclipu_wv_h) -GEN_VEXT_VV_RM(vnclipu_wv_w) +GEN_VEXT_VV_RM(vnclipu_wv_b, 1) +GEN_VEXT_VV_RM(vnclipu_wv_h, 2) +GEN_VEXT_VV_RM(vnclipu_wv_w, 4) =20 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) -GEN_VEXT_VX_RM(vnclipu_wx_b) -GEN_VEXT_VX_RM(vnclipu_wx_h) -GEN_VEXT_VX_RM(vnclipu_wx_w) +GEN_VEXT_VX_RM(vnclipu_wx_b, 1) +GEN_VEXT_VX_RM(vnclipu_wx_h, 2) +GEN_VEXT_VX_RM(vnclipu_wx_w, 4) =20 /* *** Vector Float Point Arithmetic Instructions --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005831802487.7318174851978; Tue, 22 Mar 2022 20:23:51 -0700 (PDT) Received: from localhost ([::1]:58502 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrbC-0002xZ-LR for importer@patchew.org; Tue, 22 Mar 2022 23:23:50 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48850) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrNE-00048z-8S; Tue, 22 Mar 2022 23:09:24 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36422) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrN9-00039k-04; Tue, 22 Mar 2022 23:09:23 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id D7F7B11F102; Wed, 23 Mar 2022 03:08:52 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 02:05:42 -0800 Subject: [PATCH qemu v2 10/13] target/riscv: rvv: Add tail agnostic for vector floating-point instructions Message-ID: <164800493107.31817.14053311036718644936-10@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005833504100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 11 + target/riscv/vector_helper.c | 448 +++++++++++++----------- 2 files changed, 263 insertions(+), 196 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 88912d9864..644309d23b 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -2328,6 +2328,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2410,6 +2411,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ gen_set_rm(s, RISCV_FRM_DYN); \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2448,6 +2450,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2487,6 +2490,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ gen_set_rm(s, RISCV_FRM_DYN); \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2523,6 +2527,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -2562,6 +2567,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a= ) \ gen_set_rm(s, RISCV_FRM_DYN); \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -2645,6 +2651,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, @@ -2849,6 +2856,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -2900,6 +2908,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -2967,6 +2976,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -3020,6 +3030,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 661e78797f..697c74e8cc 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2994,13 +2994,16 @@ static void do_##NAME(void *vd, void *vs1, void *vs= 2, int i, \ *((TD *)vd + HD(i)) =3D OP(s2, s1, &env->fp_status); \ } =20 -#define GEN_VEXT_VV_ENV(NAME) \ +#define GEN_VEXT_VV_ENV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype);\ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -3010,14 +3013,18 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ do_##NAME(vd, vs1, vs2, i, env); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(ESZ)](vd, vta, vl, \ + vl * ESZ, \ + vlmax * ESZ); \ } =20 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) -GEN_VEXT_VV_ENV(vfadd_vv_h) -GEN_VEXT_VV_ENV(vfadd_vv_w) -GEN_VEXT_VV_ENV(vfadd_vv_d) +GEN_VEXT_VV_ENV(vfadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfadd_vv_d, 8) =20 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -3027,13 +3034,16 @@ static void do_##NAME(void *vd, uint64_t s1, void *= vs2, int i, \ *((TD *)vd + HD(i)) =3D OP(s2, (TX1)(T1)s1, &env->fp_status);\ } =20 -#define GEN_VEXT_VF(NAME) \ +#define GEN_VEXT_VF(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype);\ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -3043,27 +3053,31 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, = \ do_##NAME(vd, s1, vs2, i, env); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(ESZ)](vd, vta, vl, \ + vl * ESZ, \ + vlmax * ESZ); \ } =20 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) -GEN_VEXT_VF(vfadd_vf_h) -GEN_VEXT_VF(vfadd_vf_w) -GEN_VEXT_VF(vfadd_vf_d) +GEN_VEXT_VF(vfadd_vf_h, 2) +GEN_VEXT_VF(vfadd_vf_w, 4) +GEN_VEXT_VF(vfadd_vf_d, 8) =20 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) -GEN_VEXT_VV_ENV(vfsub_vv_h) -GEN_VEXT_VV_ENV(vfsub_vv_w) -GEN_VEXT_VV_ENV(vfsub_vv_d) +GEN_VEXT_VV_ENV(vfsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfsub_vv_d, 8) RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) -GEN_VEXT_VF(vfsub_vf_h) -GEN_VEXT_VF(vfsub_vf_w) -GEN_VEXT_VF(vfsub_vf_d) +GEN_VEXT_VF(vfsub_vf_h, 2) +GEN_VEXT_VF(vfsub_vf_w, 4) +GEN_VEXT_VF(vfsub_vf_d, 8) =20 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) { @@ -3083,9 +3097,9 @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, = float_status *s) RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) -GEN_VEXT_VF(vfrsub_vf_h) -GEN_VEXT_VF(vfrsub_vf_w) -GEN_VEXT_VF(vfrsub_vf_d) +GEN_VEXT_VF(vfrsub_vf_h, 2) +GEN_VEXT_VF(vfrsub_vf_w, 4) +GEN_VEXT_VF(vfrsub_vf_d, 8) =20 /* Vector Widening Floating-Point Add/Subtract Instructions */ static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) @@ -3103,12 +3117,12 @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, fl= oat_status *s) =20 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) -GEN_VEXT_VV_ENV(vfwadd_vv_h) -GEN_VEXT_VV_ENV(vfwadd_vv_w) +GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) +GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) -GEN_VEXT_VF(vfwadd_vf_h) -GEN_VEXT_VF(vfwadd_vf_w) +GEN_VEXT_VF(vfwadd_vf_h, 4) +GEN_VEXT_VF(vfwadd_vf_w, 8) =20 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { @@ -3125,12 +3139,12 @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, fl= oat_status *s) =20 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) -GEN_VEXT_VV_ENV(vfwsub_vv_h) -GEN_VEXT_VV_ENV(vfwsub_vv_w) +GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) +GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) -GEN_VEXT_VF(vfwsub_vf_h) -GEN_VEXT_VF(vfwsub_vf_w) +GEN_VEXT_VF(vfwsub_vf_h, 4) +GEN_VEXT_VF(vfwsub_vf_w, 8) =20 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) { @@ -3144,12 +3158,12 @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, f= loat_status *s) =20 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) -GEN_VEXT_VV_ENV(vfwadd_wv_h) -GEN_VEXT_VV_ENV(vfwadd_wv_w) +GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) +GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) -GEN_VEXT_VF(vfwadd_wf_h) -GEN_VEXT_VF(vfwadd_wf_w) +GEN_VEXT_VF(vfwadd_wf_h, 4) +GEN_VEXT_VF(vfwadd_wf_w, 8) =20 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) { @@ -3163,39 +3177,39 @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, f= loat_status *s) =20 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) -GEN_VEXT_VV_ENV(vfwsub_wv_h) -GEN_VEXT_VV_ENV(vfwsub_wv_w) +GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) +GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) -GEN_VEXT_VF(vfwsub_wf_h) -GEN_VEXT_VF(vfwsub_wf_w) +GEN_VEXT_VF(vfwsub_wf_h, 4) +GEN_VEXT_VF(vfwsub_wf_w, 8) =20 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) -GEN_VEXT_VV_ENV(vfmul_vv_h) -GEN_VEXT_VV_ENV(vfmul_vv_w) -GEN_VEXT_VV_ENV(vfmul_vv_d) +GEN_VEXT_VV_ENV(vfmul_vv_h, 2) +GEN_VEXT_VV_ENV(vfmul_vv_w, 4) +GEN_VEXT_VV_ENV(vfmul_vv_d, 8) RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) -GEN_VEXT_VF(vfmul_vf_h) -GEN_VEXT_VF(vfmul_vf_w) -GEN_VEXT_VF(vfmul_vf_d) +GEN_VEXT_VF(vfmul_vf_h, 2) +GEN_VEXT_VF(vfmul_vf_w, 4) +GEN_VEXT_VF(vfmul_vf_d, 8) =20 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) -GEN_VEXT_VV_ENV(vfdiv_vv_h) -GEN_VEXT_VV_ENV(vfdiv_vv_w) -GEN_VEXT_VV_ENV(vfdiv_vv_d) +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) -GEN_VEXT_VF(vfdiv_vf_h) -GEN_VEXT_VF(vfdiv_vf_w) -GEN_VEXT_VF(vfdiv_vf_d) +GEN_VEXT_VF(vfdiv_vf_h, 2) +GEN_VEXT_VF(vfdiv_vf_w, 4) +GEN_VEXT_VF(vfdiv_vf_d, 8) =20 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) { @@ -3215,9 +3229,9 @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, = float_status *s) RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) -GEN_VEXT_VF(vfrdiv_vf_h) -GEN_VEXT_VF(vfrdiv_vf_w) -GEN_VEXT_VF(vfrdiv_vf_d) +GEN_VEXT_VF(vfrdiv_vf_h, 2) +GEN_VEXT_VF(vfrdiv_vf_w, 4) +GEN_VEXT_VF(vfrdiv_vf_d, 8) =20 /* Vector Widening Floating-Point Multiply */ static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) @@ -3234,12 +3248,12 @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, fl= oat_status *s) } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) -GEN_VEXT_VV_ENV(vfwmul_vv_h) -GEN_VEXT_VV_ENV(vfwmul_vv_w) +GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) -GEN_VEXT_VF(vfwmul_vf_h) -GEN_VEXT_VF(vfwmul_vf_w) +GEN_VEXT_VF(vfwmul_vf_h, 4) +GEN_VEXT_VF(vfwmul_vf_w, 8) =20 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -3270,9 +3284,9 @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint6= 4_t d, float_status *s) RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) -GEN_VEXT_VV_ENV(vfmacc_vv_h) -GEN_VEXT_VV_ENV(vfmacc_vv_w) -GEN_VEXT_VV_ENV(vfmacc_vv_d) +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) =20 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -3286,9 +3300,9 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs= 2, int i, \ RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) -GEN_VEXT_VF(vfmacc_vf_h) -GEN_VEXT_VF(vfmacc_vf_w) -GEN_VEXT_VF(vfmacc_vf_d) +GEN_VEXT_VF(vfmacc_vf_h, 2) +GEN_VEXT_VF(vfmacc_vf_w, 4) +GEN_VEXT_VF(vfmacc_vf_d, 8) =20 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3311,15 +3325,15 @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) -GEN_VEXT_VV_ENV(vfnmacc_vv_h) -GEN_VEXT_VV_ENV(vfnmacc_vv_w) -GEN_VEXT_VV_ENV(vfnmacc_vv_d) +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) -GEN_VEXT_VF(vfnmacc_vf_h) -GEN_VEXT_VF(vfnmacc_vf_w) -GEN_VEXT_VF(vfnmacc_vf_d) +GEN_VEXT_VF(vfnmacc_vf_h, 2) +GEN_VEXT_VF(vfnmacc_vf_w, 4) +GEN_VEXT_VF(vfnmacc_vf_d, 8) =20 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3339,15 +3353,15 @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) -GEN_VEXT_VV_ENV(vfmsac_vv_h) -GEN_VEXT_VV_ENV(vfmsac_vv_w) -GEN_VEXT_VV_ENV(vfmsac_vv_d) +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) -GEN_VEXT_VF(vfmsac_vf_h) -GEN_VEXT_VF(vfmsac_vf_w) -GEN_VEXT_VF(vfmsac_vf_d) +GEN_VEXT_VF(vfmsac_vf_h, 2) +GEN_VEXT_VF(vfmsac_vf_w, 4) +GEN_VEXT_VF(vfmsac_vf_d, 8) =20 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3367,15 +3381,15 @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) -GEN_VEXT_VV_ENV(vfnmsac_vv_h) -GEN_VEXT_VV_ENV(vfnmsac_vv_w) -GEN_VEXT_VV_ENV(vfnmsac_vv_d) +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) -GEN_VEXT_VF(vfnmsac_vf_h) -GEN_VEXT_VF(vfnmsac_vf_w) -GEN_VEXT_VF(vfnmsac_vf_d) +GEN_VEXT_VF(vfnmsac_vf_h, 2) +GEN_VEXT_VF(vfnmsac_vf_w, 4) +GEN_VEXT_VF(vfnmsac_vf_d, 8) =20 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3395,15 +3409,15 @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) -GEN_VEXT_VV_ENV(vfmadd_vv_h) -GEN_VEXT_VV_ENV(vfmadd_vv_w) -GEN_VEXT_VV_ENV(vfmadd_vv_d) +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) -GEN_VEXT_VF(vfmadd_vf_h) -GEN_VEXT_VF(vfmadd_vf_w) -GEN_VEXT_VF(vfmadd_vf_d) +GEN_VEXT_VF(vfmadd_vf_h, 2) +GEN_VEXT_VF(vfmadd_vf_w, 4) +GEN_VEXT_VF(vfmadd_vf_d, 8) =20 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3426,15 +3440,15 @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) -GEN_VEXT_VV_ENV(vfnmadd_vv_h) -GEN_VEXT_VV_ENV(vfnmadd_vv_w) -GEN_VEXT_VV_ENV(vfnmadd_vv_d) +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) -GEN_VEXT_VF(vfnmadd_vf_h) -GEN_VEXT_VF(vfnmadd_vf_w) -GEN_VEXT_VF(vfnmadd_vf_d) +GEN_VEXT_VF(vfnmadd_vf_h, 2) +GEN_VEXT_VF(vfnmadd_vf_w, 4) +GEN_VEXT_VF(vfnmadd_vf_d, 8) =20 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *= s) { @@ -3454,15 +3468,15 @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uin= t64_t d, float_status *s) RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) -GEN_VEXT_VV_ENV(vfmsub_vv_h) -GEN_VEXT_VV_ENV(vfmsub_vv_w) -GEN_VEXT_VV_ENV(vfmsub_vv_d) +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) -GEN_VEXT_VF(vfmsub_vf_h) -GEN_VEXT_VF(vfmsub_vf_w) -GEN_VEXT_VF(vfmsub_vf_d) +GEN_VEXT_VF(vfmsub_vf_h, 2) +GEN_VEXT_VF(vfmsub_vf_w, 4) +GEN_VEXT_VF(vfmsub_vf_d, 8) =20 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status = *s) { @@ -3482,15 +3496,15 @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, ui= nt64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) -GEN_VEXT_VV_ENV(vfnmsub_vv_h) -GEN_VEXT_VV_ENV(vfnmsub_vv_w) -GEN_VEXT_VV_ENV(vfnmsub_vv_d) +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) -GEN_VEXT_VF(vfnmsub_vf_h) -GEN_VEXT_VF(vfnmsub_vf_w) -GEN_VEXT_VF(vfnmsub_vf_d) +GEN_VEXT_VF(vfnmsub_vf_h, 2) +GEN_VEXT_VF(vfnmsub_vf_w, 4) +GEN_VEXT_VF(vfnmsub_vf_d, 8) =20 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status = *s) @@ -3507,12 +3521,12 @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, ui= nt64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) -GEN_VEXT_VV_ENV(vfwmacc_vv_h) -GEN_VEXT_VV_ENV(vfwmacc_vv_w) +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) -GEN_VEXT_VF(vfwmacc_vf_h) -GEN_VEXT_VF(vfwmacc_vf_w) +GEN_VEXT_VF(vfwmacc_vf_h, 4) +GEN_VEXT_VF(vfwmacc_vf_w, 8) =20 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status= *s) { @@ -3530,12 +3544,12 @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, u= int64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) -GEN_VEXT_VV_ENV(vfwnmacc_vv_h) -GEN_VEXT_VV_ENV(vfwnmacc_vv_w) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) -GEN_VEXT_VF(vfwnmacc_vf_h) -GEN_VEXT_VF(vfwnmacc_vf_w) +GEN_VEXT_VF(vfwnmacc_vf_h, 4) +GEN_VEXT_VF(vfwnmacc_vf_w, 8) =20 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status = *s) { @@ -3553,12 +3567,12 @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, ui= nt64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) -GEN_VEXT_VV_ENV(vfwmsac_vv_h) -GEN_VEXT_VV_ENV(vfwmsac_vv_w) +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) -GEN_VEXT_VF(vfwmsac_vf_h) -GEN_VEXT_VF(vfwmsac_vf_w) +GEN_VEXT_VF(vfwmsac_vf_h, 4) +GEN_VEXT_VF(vfwmsac_vf_w, 8) =20 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status= *s) { @@ -3576,12 +3590,12 @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, u= int64_t d, float_status *s) =20 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) -GEN_VEXT_VV_ENV(vfwnmsac_vv_h) -GEN_VEXT_VV_ENV(vfwnmsac_vv_w) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) -GEN_VEXT_VF(vfwnmsac_vf_h) -GEN_VEXT_VF(vfwnmsac_vf_w) +GEN_VEXT_VF(vfwnmsac_vf_h, 4) +GEN_VEXT_VF(vfwnmsac_vf_w, 8) =20 /* Vector Floating-Point Square-Root Instruction */ /* (TD, T2, TX2) */ @@ -3597,12 +3611,16 @@ static void do_##NAME(void *vd, void *vs2, int i, = \ *((TD *)vd + HD(i)) =3D OP(s2, &env->fp_status); \ } =20 -#define GEN_VEXT_V_ENV(NAME) \ +#define GEN_VEXT_V_ENV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), \ + env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ if (vl =3D=3D 0) { \ @@ -3615,14 +3633,18 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ do_##NAME(vd, vs2, i, env); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(ESZ)](vd, vta, vl, \ + vl * ESZ, \ + vlmax * ESZ); \ } =20 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) -GEN_VEXT_V_ENV(vfsqrt_v_h) -GEN_VEXT_V_ENV(vfsqrt_v_w) -GEN_VEXT_V_ENV(vfsqrt_v_d) +GEN_VEXT_V_ENV(vfsqrt_v_h, 2) +GEN_VEXT_V_ENV(vfsqrt_v_w, 4) +GEN_VEXT_V_ENV(vfsqrt_v_d, 8) =20 /* * Vector Floating-Point Reciprocal Square-Root Estimate Instruction @@ -3802,9 +3824,9 @@ static float64 frsqrt7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) -GEN_VEXT_V_ENV(vfrsqrt7_v_h) -GEN_VEXT_V_ENV(vfrsqrt7_v_w) -GEN_VEXT_V_ENV(vfrsqrt7_v_d) +GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) +GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) +GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) =20 /* * Vector Floating-Point Reciprocal Estimate Instruction @@ -3993,36 +4015,36 @@ static float64 frec7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) -GEN_VEXT_V_ENV(vfrec7_v_h) -GEN_VEXT_V_ENV(vfrec7_v_w) -GEN_VEXT_V_ENV(vfrec7_v_d) +GEN_VEXT_V_ENV(vfrec7_v_h, 2) +GEN_VEXT_V_ENV(vfrec7_v_w, 4) +GEN_VEXT_V_ENV(vfrec7_v_d, 8) =20 /* Vector Floating-Point MIN/MAX Instructions */ RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) -GEN_VEXT_VV_ENV(vfmin_vv_h) -GEN_VEXT_VV_ENV(vfmin_vv_w) -GEN_VEXT_VV_ENV(vfmin_vv_d) +GEN_VEXT_VV_ENV(vfmin_vv_h, 2) +GEN_VEXT_VV_ENV(vfmin_vv_w, 4) +GEN_VEXT_VV_ENV(vfmin_vv_d, 8) RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) -GEN_VEXT_VF(vfmin_vf_h) -GEN_VEXT_VF(vfmin_vf_w) -GEN_VEXT_VF(vfmin_vf_d) +GEN_VEXT_VF(vfmin_vf_h, 2) +GEN_VEXT_VF(vfmin_vf_w, 4) +GEN_VEXT_VF(vfmin_vf_d, 8) =20 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) -GEN_VEXT_VV_ENV(vfmax_vv_h) -GEN_VEXT_VV_ENV(vfmax_vv_w) -GEN_VEXT_VV_ENV(vfmax_vv_d) +GEN_VEXT_VV_ENV(vfmax_vv_h, 2) +GEN_VEXT_VV_ENV(vfmax_vv_w, 4) +GEN_VEXT_VV_ENV(vfmax_vv_d, 8) RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) -GEN_VEXT_VF(vfmax_vf_h) -GEN_VEXT_VF(vfmax_vf_w) -GEN_VEXT_VF(vfmax_vf_d) +GEN_VEXT_VF(vfmax_vf_h, 2) +GEN_VEXT_VF(vfmax_vf_w, 4) +GEN_VEXT_VF(vfmax_vf_d, 8) =20 /* Vector Floating-Point Sign-Injection Instructions */ static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) @@ -4043,15 +4065,15 @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, flo= at_status *s) RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) -GEN_VEXT_VV_ENV(vfsgnj_vv_h) -GEN_VEXT_VV_ENV(vfsgnj_vv_w) -GEN_VEXT_VV_ENV(vfsgnj_vv_d) +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) -GEN_VEXT_VF(vfsgnj_vf_h) -GEN_VEXT_VF(vfsgnj_vf_w) -GEN_VEXT_VF(vfsgnj_vf_d) +GEN_VEXT_VF(vfsgnj_vf_h, 2) +GEN_VEXT_VF(vfsgnj_vf_w, 4) +GEN_VEXT_VF(vfsgnj_vf_d, 8) =20 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) { @@ -4071,15 +4093,15 @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, fl= oat_status *s) RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) -GEN_VEXT_VV_ENV(vfsgnjn_vv_h) -GEN_VEXT_VV_ENV(vfsgnjn_vv_w) -GEN_VEXT_VV_ENV(vfsgnjn_vv_d) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) -GEN_VEXT_VF(vfsgnjn_vf_h) -GEN_VEXT_VF(vfsgnjn_vf_w) -GEN_VEXT_VF(vfsgnjn_vf_d) +GEN_VEXT_VF(vfsgnjn_vf_h, 2) +GEN_VEXT_VF(vfsgnjn_vf_w, 4) +GEN_VEXT_VF(vfsgnjn_vf_d, 8) =20 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) { @@ -4099,15 +4121,15 @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, fl= oat_status *s) RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) -GEN_VEXT_VV_ENV(vfsgnjx_vv_h) -GEN_VEXT_VV_ENV(vfsgnjx_vv_w) -GEN_VEXT_VV_ENV(vfsgnjx_vv_d) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) -GEN_VEXT_VF(vfsgnjx_vf_h) -GEN_VEXT_VF(vfsgnjx_vf_w) -GEN_VEXT_VF(vfsgnjx_vf_d) +GEN_VEXT_VF(vfsgnjx_vf_h, 2) +GEN_VEXT_VF(vfsgnjx_vf_w, 4) +GEN_VEXT_VF(vfsgnjx_vf_d, 8) =20 /* Vector Floating-Point Compare Instructions */ #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ @@ -4116,6 +4138,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -4128,6 +4153,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, voi= d *vs2, \ DO_OP(s2, s1, &env->fp_status)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) @@ -4140,6 +4171,9 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, vo= id *vs2, \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -4151,6 +4185,12 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, v= oid *vs2, \ DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) @@ -4251,12 +4291,16 @@ static void do_##NAME(void *vd, void *vs2, int i) = \ *((TD *)vd + HD(i)) =3D OP(s2); \ } =20 -#define GEN_VEXT_V(NAME) \ +#define GEN_VEXT_V(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), \ + env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -4266,6 +4310,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ do_##NAME(vd, vs2, i); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(ESZ)](vd, vta, vl, \ + vl * ESZ, \ + vlmax * ESZ); \ } =20 target_ulong fclass_h(uint64_t frs1) @@ -4328,17 +4376,22 @@ target_ulong fclass_d(uint64_t frs1) RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) -GEN_VEXT_V(vfclass_v_h) -GEN_VEXT_V(vfclass_v_w) -GEN_VEXT_V(vfclass_v_d) +GEN_VEXT_V(vfclass_v_h, 2) +GEN_VEXT_V(vfclass_v_w, 4) +GEN_VEXT_V(vfclass_v_d, 8) =20 /* Vector Floating-Point Merge Instruction */ + #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -4347,6 +4400,9 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, vo= id *vs2, \ =3D (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) @@ -4358,33 +4414,33 @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) =20 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) -GEN_VEXT_V_ENV(vfcvt_x_f_v_h) -GEN_VEXT_V_ENV(vfcvt_x_f_v_w) -GEN_VEXT_V_ENV(vfcvt_x_f_v_d) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) =20 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) =20 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_x_v_h) -GEN_VEXT_V_ENV(vfcvt_f_x_v_w) -GEN_VEXT_V_ENV(vfcvt_f_x_v_d) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) =20 /* Widening Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4394,30 +4450,30 @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d) /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned inte= ger.*/ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) =20 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer= . */ RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) =20 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width fl= oat */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) =20 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) =20 /* * vfwcvt.f.f.v vd, vs2, vm @@ -4430,8 +4486,8 @@ static uint32_t vfwcvtffv16(uint16_t a, float_status = *s) =20 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) =20 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -4442,29 +4498,29 @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) =20 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer= . */ RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) -GEN_VEXT_V_ENV(vfncvt_x_f_w_b) -GEN_VEXT_V_ENV(vfncvt_x_f_w_h) -GEN_VEXT_V_ENV(vfncvt_x_f_w_w) +GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) +GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) =20 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to fl= oat */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) =20 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_x_w_h) -GEN_VEXT_V_ENV(vfncvt_f_x_w_w) +GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) =20 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. = */ static uint16_t vfncvtffv16(uint32_t a, float_status *s) @@ -4474,8 +4530,8 @@ static uint16_t vfncvtffv16(uint32_t a, float_status = *s) =20 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_f_w_h) -GEN_VEXT_V_ENV(vfncvt_f_f_w_w) +GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) =20 /* *** Vector Reduction Operations --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005281603657.0977633141205; Tue, 22 Mar 2022 20:14:41 -0700 (PDT) Received: from localhost ([::1]:33296 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrSK-0002oU-AZ for importer@patchew.org; Tue, 22 Mar 2022 23:14:40 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48748) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMq-0002fD-35; Tue, 22 Mar 2022 23:09:00 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36424) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrMo-00039n-Ls; Tue, 22 Mar 2022 23:08:59 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 00E9211F103; Wed, 23 Mar 2022 03:08:53 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 04:21:37 -0800 Subject: [PATCH qemu v2 11/13] target/riscv: rvv: Add tail agnostic for vector reduction instructions Message-ID: <164800493107.31817.14053311036718644936-11@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005282643100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/vector_helper.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 697c74e8cc..2a8ecf40f5 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4543,6 +4543,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ { \ uint32_t vm =3D vext_vm(desc); \ uint32_t vl =3D env->vl; \ + uint32_t esz =3D sizeof(TD); \ + uint32_t vlenb =3D env_archcpu(env)->cfg.vlen >> 3; \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ TD s1 =3D *((TD *)vs1 + HD(0)); \ \ @@ -4555,6 +4558,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ } \ *((TD *)vd + HD(0)) =3D s1; \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, \ + vlenb); \ } =20 /* vd[0] =3D sum(vs1[0], vs2[*]) */ @@ -4660,6 +4666,9 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void = *vs1, { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t esz =3D sizeof(uint32_t); + uint32_t vlenb =3D env_archcpu(env)->cfg.vlen >> 3; + uint32_t vta =3D vext_vta(desc); uint32_t i; uint32_t s1 =3D *((uint32_t *)vs1 + H4(0)); =20 @@ -4673,6 +4682,8 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void = *vs1, } *((uint32_t *)vd + H4(0)) =3D s1; env->vstart =3D 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, vlenb); } =20 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, @@ -4680,6 +4691,9 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void = *vs1, { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t esz =3D sizeof(uint64_t); + uint32_t vlenb =3D env_archcpu(env)->cfg.vlen >> 3; + uint32_t vta =3D vext_vta(desc); uint32_t i; uint64_t s1 =3D *((uint64_t *)vs1); =20 @@ -4693,6 +4707,8 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void = *vs1, } *((uint64_t *)vd) =3D s1; env->vstart =3D 0; + /* set tail elements to 1s */ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, vlenb); } =20 /* --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005566046938.120241807383; Tue, 22 Mar 2022 20:19:26 -0700 (PDT) Received: from localhost ([::1]:45460 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrWv-0002bV-09 for importer@patchew.org; Tue, 22 Mar 2022 23:19:25 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48828) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrNA-0003tL-K2; Tue, 22 Mar 2022 23:09:20 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36426) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrN8-00039p-OP; Tue, 22 Mar 2022 23:09:20 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 2C29D11F104; Wed, 23 Mar 2022 03:08:53 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 07:26:05 -0800 Subject: [PATCH qemu v2 12/13] target/riscv: rvv: Add tail agnostic for vector mask instructions Message-ID: <164800493107.31817.14053311036718644936-12@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005566495100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 4 ++++ target/riscv/vector_helper.c | 27 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index 644309d23b..cce811f67f 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -3119,6 +3119,7 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) = \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -3223,6 +3224,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)= \ \ data =3D FIELD_DP32(data, VDATA, VM, a->vm); \ data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ cpu_env, s->cfg_ptr->vlen / 8, \ @@ -3261,6 +3263,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_= m *a) =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_3_ptr * const fns[4] =3D { gen_helper_viota_m_b, gen_helper_viota_m_h, gen_helper_viota_m_w, gen_helper_viota_m_d, @@ -3290,6 +3293,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_2_ptr * const fns[4] =3D { gen_helper_vid_v_b, gen_helper_vid_v_h, gen_helper_vid_v_w, gen_helper_vid_v_d, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 2a8ecf40f5..8ebd26be93 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4721,6 +4721,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ uint32_t desc) \ { \ uint32_t vl =3D env->vl; \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype);\ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ int a, b; \ \ @@ -4730,6 +4733,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, = \ vext_set_elem_mask(vd, i, OP(b, a)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + if (vta) { \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } =20 #define DO_NAND(N, M) (!(N & M)) @@ -4797,6 +4806,8 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPU= RISCVState *env, { uint32_t vm =3D vext_vm(desc); uint32_t vl =3D env->vl; + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); + uint32_t vta =3D vext_vta(desc); int i; bool first_mask_bit =3D false; =20 @@ -4825,6 +4836,12 @@ static void vmsetm(void *vd, void *v0, void *vs2, CP= URISCVState *env, } } env->vstart =3D 0; + /* set tail elements to 1s */ + if (vta) { + for (; i < vlmax; i++) { + vext_set_elem_mask(vd, i, 1); + } + } } =20 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, @@ -4852,6 +4869,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPUR= ISCVState *env, \ { \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t sum =3D 0; = \ int i; \ \ @@ -4865,6 +4885,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPUR= ISCVState *env, \ } \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) @@ -4878,6 +4900,9 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *= env, uint32_t desc) \ { \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ int i; \ \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -4887,6 +4912,8 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *= env, uint32_t desc) \ *((ETYPE *)vd + H(i)) =3D i; = \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) --=20 2.34.1 From nobody Mon Feb 9 11:34:13 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=git.sr.ht Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1648005133668250.4700809233923; Tue, 22 Mar 2022 20:12:13 -0700 (PDT) Received: from localhost ([::1]:55708 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nWrPw-0007Mm-JX for importer@patchew.org; Tue, 22 Mar 2022 23:12:12 -0400 Received: from eggs.gnu.org ([209.51.188.92]:48830) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrNA-0003u3-Q7; Tue, 22 Mar 2022 23:09:20 -0400 Received: from mail-b.sr.ht ([173.195.146.151]:36428) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nWrN8-00039x-Sx; Tue, 22 Mar 2022 23:09:20 -0400 Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 4AE0611F106; Wed, 23 Mar 2022 03:08:53 +0000 (UTC) Authentication-Results: mail-b.sr.ht; dkim=none From: ~eopxd Date: Mon, 07 Mar 2022 07:59:26 -0800 Subject: [PATCH qemu v2 13/13] target/riscv: rvv: Add tail agnostic for vector permutation instructions Message-ID: <164800493107.31817.14053311036718644936-13@git.sr.ht> X-Mailer: git.sr.ht In-Reply-To: <164800493107.31817.14053311036718644936-0@git.sr.ht> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=173.195.146.151; envelope-from=outgoing@sr.ht; helo=mail-b.sr.ht X-Spam_score_int: 36 X-Spam_score: 3.6 X-Spam_bar: +++ X-Spam_report: (3.6 / 5.0 requ) BAYES_00=-1.9, DATE_IN_PAST_96_XX=3.405, FREEMAIL_FORGED_REPLYTO=2.095, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=no autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: ~eopxd Cc: WeiWei Li , Frank Chang , eop Chen , Bin Meng , Alistair Francis , Palmer Dabbelt Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1648005135774100001 From: eopXD Signed-off-by: eop Chen Reviewed-by: Frank Chang --- target/riscv/insn_trans/trans_rvv.c.inc | 2 + target/riscv/vector_helper.c | 54 ++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_tr= ans/trans_rvv.c.inc index cce811f67f..92ee0641ba 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -3739,6 +3739,7 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r= *a) tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); =20 data =3D FIELD_DP32(data, VDATA, LMUL, s->lmul); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, @@ -3844,6 +3845,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, u= int8_t seq) } =20 data =3D FIELD_DP32(data, VDATA, VM, a->vm); + data =3D FIELD_DP32(data, VDATA, VTA, s->vta); =20 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs2), cpu_env, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 8ebd26be93..595cfa3e0e 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4932,6 +4932,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ { \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ target_ulong offset =3D s1, i_min, i; = \ \ i_min =3D MAX(env->vstart, offset); = \ @@ -4941,6 +4944,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ } \ *((ETYPE *)vd + H(i)) =3D *((ETYPE *)vs2 + H(i - offset)); = \ } \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] =3D vs2[i] */ @@ -4953,12 +4958,15 @@ GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax =3D vext_max_elems(desc, ctzl(sizeof(ETYPE))); = \ + uint32_t max_elem =3D vext_max_elems(desc, ctzl(sizeof(ETYPE))); = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ target_ulong i_max, i; \ \ - i_max =3D MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); = \ + i_max =3D MAX(MIN(s1 < max_elem ? max_elem - s1 : 0, vl), env->vstart)= ; \ for (i =3D env->vstart; i < i_max; ++i) { = \ if (vm || vext_elem_mask(v0, i)) { \ *((ETYPE *)vd + H(i)) =3D *((ETYPE *)vs2 + H(i + s1)); = \ @@ -4972,6 +4980,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1= , void *vs2, \ } \ \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] =3D vs2[i+rs1] */ @@ -4987,6 +4997,9 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, = target_ulong s1, \ typedef uint##BITWIDTH##_t ETYPE; = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t i; = \ = \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -5000,6 +5013,8 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, = target_ulong s1, \ } = \ } = \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ = \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); = \ } =20 GEN_VEXT_VSLIE1UP(8, H1) @@ -5027,6 +5042,9 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0= , target_ulong s1, \ typedef uint##BITWIDTH##_t ETYPE; = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t i; = \ = \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -5040,6 +5058,8 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0= , target_ulong s1, \ } = \ } = \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ = \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); = \ } =20 GEN_VEXT_VSLIDE1DOWN(8, H1) @@ -5090,9 +5110,12 @@ GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax =3D vext_max_elems(desc, ctzl(sizeof(TS2))); = \ + uint32_t max_elem =3D vext_max_elems(desc, ctzl(sizeof(TS2))); = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(TS2); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint64_t index; \ uint32_t i; \ \ @@ -5101,13 +5124,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, vo= id *vs2, \ continue; \ } \ index =3D *((TS1 *)vs1 + HS1(i)); = \ - if (index >=3D vlmax) { = \ + if (index >=3D max_elem) { = \ *((TS2 *)vd + HS2(i)) =3D 0; = \ } else { \ *((TS2 *)vd + HS2(i)) =3D *((TS2 *)vs2 + HS2(index)); = \ } \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* vd[i] =3D (vs1[i] >=3D VLMAX) ? 0 : vs2[vs1[i]]; */ @@ -5125,9 +5150,12 @@ GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, ui= nt64_t, H2, H8) void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax =3D vext_max_elems(desc, ctzl(sizeof(ETYPE))); = \ + uint32_t max_elem =3D vext_max_elems(desc, ctzl(sizeof(ETYPE))); = \ uint32_t vm =3D vext_vm(desc); = \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint64_t index =3D s1; = \ uint32_t i; \ \ @@ -5135,13 +5163,15 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong = s1, void *vs2, \ if (!vm && !vext_elem_mask(v0, i)) { \ continue; \ } \ - if (index >=3D vlmax) { = \ + if (index >=3D max_elem) { = \ *((ETYPE *)vd + H(i)) =3D 0; = \ } else { \ *((ETYPE *)vd + H(i)) =3D *((ETYPE *)vs2 + H(index)); = \ } \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* vd[i] =3D (x[rs1] >=3D VLMAX) ? 0 : vs2[rs1] */ @@ -5156,6 +5186,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl =3D env->vl; = \ + uint32_t esz =3D sizeof(ETYPE); = \ + uint32_t vlmax =3D vext_get_total_elem(env_archcpu(env), env->vtype); = \ + uint32_t vta =3D vext_vta(desc); = \ uint32_t num =3D 0, i; = \ \ for (i =3D env->vstart; i < vl; i++) { = \ @@ -5166,6 +5199,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void= *vs2, \ num++; \ } \ env->vstart =3D 0; = \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, vlmax * esz); \ } =20 /* Compress into vd elements of vs2 where vs1 is enabled */ @@ -5202,6 +5237,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ { \ uint32_t vl =3D env->vl; \ uint32_t vm =3D vext_vm(desc); \ + uint32_t esz =3D sizeof(ETYPE); \ + uint32_t vlmax =3D \ + vext_get_total_elem(env_archcpu(env), env->vtype); \ + uint32_t vta =3D vext_vta(desc); \ uint32_t i; \ \ for (i =3D env->vstart; i < vl; i++) { \ @@ -5211,6 +5250,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, = \ *((ETYPE *)vd + HD(i)) =3D *((DTYPE *)vs2 + HS1(i)); \ } \ env->vstart =3D 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s_fns[ctzl(esz)](vd, vta, vl, vl * esz, \ + vlmax * esz); \ } =20 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) --=20 2.34.1