target/arm/helper-sve.h | 10 +++++++++ target/arm/sve.decode | 5 +++++ target/arm/sve_helper.c | 29 +++++++++++++++++++++++++ target/arm/translate-sve.c | 43 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+)
Signed-off-by: Stephen Long <steplong@quicinc.com>
---
Submitting this for early review. I'm working with Richard on SVE2 support for
qemu. I'll be attempting to tackle the insns in the 'SVE2 integer add/subtract
narrow high part' category next [1].
[1] ISA manual: https://static.docs.arm.com/ddi0602/d/ISA_A64_xml_futureA-2019-12_OPT.pdf (page 2950)
target/arm/helper-sve.h | 10 +++++++++
target/arm/sve.decode | 5 +++++
target/arm/sve_helper.c | 29 +++++++++++++++++++++++++
target/arm/translate-sve.c | 43 ++++++++++++++++++++++++++++++++++++++
4 files changed, 87 insertions(+)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 5dd880cf6d..2077df9a95 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2516,6 +2516,16 @@ DEF_HELPER_FLAGS_3(sve2_uqrshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_match_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_match_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_nmatch_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_nmatch_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 374e47fb05..652668df02 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1305,6 +1305,11 @@ UQSHRNT 01000101 .. 1 ..... 00 1101 ..... ..... @rd_rn_tszimm_shr
UQRSHRNB 01000101 .. 1 ..... 00 1110 ..... ..... @rd_rn_tszimm_shr
UQRSHRNT 01000101 .. 1 ..... 00 1111 ..... ..... @rd_rn_tszimm_shr
+### SVE2 Character Match
+
+MATCH 01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
+NMATCH 01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
+
## SVE2 floating-point pairwise operations
FADDP 01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index b68f62cd7f..c75258b56d 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -6890,3 +6890,32 @@ DO_ST1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_ST1_ZPZ_S
#undef DO_ST1_ZPZ_D
+
+#define DO_ZPZZ_CHAR_MATCH(NAME, TYPE, H, EQUALS) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ uint16_t *pd = (uint16_t *)(vd + H1_2(i >> 3)); \
+ *pd = (*pd & ~1) | ((0 & EQUALS) | (1 & !EQUALS)); \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ for (intptr_t j = 0; j < 16; j += sizeof(TYPE)) { \
+ TYPE mm = *(TYPE *)(vm + H(i * 16 + j)); \
+ bool eq = nn == mm; \
+ if ((eq && EQUALS) || (!eq && !EQUALS)) { \
+ *pd = (*pd & ~1) | ((1 & EQUALS) | (0 & !EQUALS)); \
+ } \
+ } \
+ } \
+ } \
+}
+
+DO_ZPZZ_CHAR_MATCH(sve2_match_zpzz_b, uint8_t, H1, true)
+DO_ZPZZ_CHAR_MATCH(sve2_match_zpzz_h, uint16_t, H1_2, true)
+
+DO_ZPZZ_CHAR_MATCH(sve2_nmatch_zpzz_b, uint8_t, H1, false)
+DO_ZPZZ_CHAR_MATCH(sve2_nmatch_zpzz_h, uint16_t, H1_2, false)
+
+#undef DO_ZPZZ_CHAR_MATCH
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 07a2040208..7175148bfd 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7246,6 +7246,49 @@ static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
return do_sve2_shr_narrow(s, a, ops);
}
+static bool do_sve2_zpzz_char_match(DisasContext *s, arg_rprr_esz *a,
+ gen_helper_gvec_4 *fn)
+{
+ if (!dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned psz = pred_full_reg_size(s);
+ int dofs = pred_full_reg_offset(s, a->rd);
+ int nofs = vec_full_reg_offset(s, a->rn);
+ int mofs = vec_full_reg_offset(s, a->rm);
+ int gofs = pred_full_reg_offset(s, a->pg);
+
+ /* Save a copy if the destination overwrites the guarding predicate */
+ int tofs = gofs;
+ if (a->rd == a->pg) {
+ tofs = offsetof(CPUARMState, vfp.preg_tmp);
+ tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
+ }
+
+ tcg_gen_gvec_4_ool(dofs, nofs, mofs, gofs, vsz, vsz, 0, fn);
+ do_predtest(s, dofs, tofs, psz / 8);
+ }
+ return true;
+}
+
+#define DO_SVE2_ZPZZ_CHAR_MATCH(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
+{ \
+ static gen_helper_gvec_4 * const fns[4] = { \
+ gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
+ NULL, NULL \
+ }; \
+ return do_sve2_zpzz_char_match(s, a, fns[a->esz]); \
+}
+
+DO_SVE2_ZPZZ_CHAR_MATCH(MATCH, match)
+DO_SVE2_ZPZZ_CHAR_MATCH(NMATCH, nmatch)
+
static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
gen_helper_gvec_4_ptr *fn)
{
--
2.17.1
On 4/13/20 4:42 PM, Stephen Long wrote: > +#define DO_ZPZZ_CHAR_MATCH(NAME, TYPE, H, EQUALS) \ > +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ > +{ \ > + intptr_t i, opr_sz = simd_oprsz(desc); \ > + for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \ > + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ > + uint16_t *pd = (uint16_t *)(vd + H1_2(i >> 3)); \ > + *pd = (*pd & ~1) | ((0 & EQUALS) | (1 & !EQUALS)); \ > + if (pg & 1) { \ The important error here is that the predicate is not always the low bit. When operating on bytes, every bit of the predicate is significant. When operating on halfwords, every even bit of the predicate is significant. In addition, when operating on halfwords, every odd bit of the result predicate must be zero. Which is why, generally, I have constructed the output predicate as we go. See, for instance, DO_CMP_PPZZ. > + TYPE nn = *(TYPE *)(vn + H(i)); \ > + for (intptr_t j = 0; j < 16; j += sizeof(TYPE)) { \ > + TYPE mm = *(TYPE *)(vm + H(i * 16 + j)); \ mm needs to start at the beginning of the segment, which in this case is (i & -16). You don't need the elements of mm in any particular order (all of them are significant), so you can drop the use of H() here. Therefore the indexing for mm should be vm + (i & -16) + j. > + bool eq = nn == mm; \ > + if ((eq && EQUALS) || (!eq && !EQUALS)) { \ > + *pd = (*pd & ~1) | ((1 & EQUALS) | (0 & !EQUALS)); \ > + } \ It might be handy to split out the inner loop to a helper function, as, while the basic loop is ok, there are tricks that can improve it, so that we're comparing 8 bytes at a time. > +static bool do_sve2_zpzz_char_match(DisasContext *s, arg_rprr_esz *a, > + gen_helper_gvec_4 *fn) > +{ > + if (!dc_isar_feature(aa64_sve2, s)) { > + return false; > + } > + if (fn == NULL) { > + return false; > + } > + if (sve_access_check(s)) { > + unsigned vsz = vec_full_reg_size(s); > + unsigned psz = pred_full_reg_size(s); > + int dofs = pred_full_reg_offset(s, a->rd); > + int nofs = vec_full_reg_offset(s, a->rn); > + int mofs = vec_full_reg_offset(s, a->rm); > + int gofs = pred_full_reg_offset(s, a->pg); > + > + /* Save a copy if the destination overwrites the guarding predicate */ > + int tofs = gofs; > + if (a->rd == a->pg) { > + tofs = offsetof(CPUARMState, vfp.preg_tmp); > + tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); > + } > + > + tcg_gen_gvec_4_ool(dofs, nofs, mofs, gofs, vsz, vsz, 0, fn); > + do_predtest(s, dofs, tofs, psz / 8); You can avoid the copy and the predtest by using the iter_predtest_* functions and returning the flags result directly from the helper. Again, see DO_CMP_PPZZ. r~
© 2016 - 2024 Red Hat, Inc.