From: eopXD <eop.chen@sifive.com>
Compares write mask registers, and so always operate under a tail-
agnostic policy.
Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
---
target/riscv/vector_helper.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 8755671449..6356b6b0ef 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1345,6 +1345,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = \
+ vext_get_total_elems(env, desc, esz); \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1356,6 +1360,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail-agnostic */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
@@ -1394,6 +1405,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1405,6 +1419,13 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
DO_OP(s2, (ETYPE)(target_long)s1)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail-agnostic */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
--
2.34.2