target/arm/helper-sve.h | 29 +++++++++++++++++++++++++++++ target/arm/sve.decode | 7 +++++++ target/arm/sve_helper.c | 30 ++++++++++++++++++++++++++++-- target/arm/translate-sve.c | 19 +++++++++++++++++++ 4 files changed, 83 insertions(+), 2 deletions(-)
Implements SQSHL/UQSHL, SRSHR/URSHR, and SQSHLU
Signed-off-by: Stephen Long <steplong@quicinc.com>
---
I'm not too sure about the casting I'm doing willy nilly in the helper
functions, but I think they should be ok. Also, there might be some avenues
for optimization when translating similar to asr_zpzi and lsl_zpzi.
target/arm/helper-sve.h | 29 +++++++++++++++++++++++++++++
target/arm/sve.decode | 7 +++++++
target/arm/sve_helper.c | 30 ++++++++++++++++++++++++++++--
target/arm/translate-sve.c | 19 +++++++++++++++++++
4 files changed, 83 insertions(+), 2 deletions(-)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 0a62eef94e..c71f3e1f39 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2731,3 +2731,32 @@ DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_idx_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_srshr_zpzi_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_zpzi_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_zpzi_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_zpzi_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_urshr_zpzi_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_zpzi_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_zpzi_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_zpzi_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqshlu_zpzi_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_zpzi_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_zpzi_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_zpzi_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 3cf824bac5..a9cfbc80ab 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1568,3 +1568,10 @@ SM4E 01000101 00 10001 1 11100 0 ..... ..... @rdn_rm_e0
# SVE2 crypto constructive binary operations
SM4EKEY 01000101 00 1 ..... 11110 0 ..... ..... @rd_rn_rm_e0
RAX1 01000101 00 1 ..... 11110 1 ..... ..... @rd_rn_rm_e0
+
+### SVE2 bitwise shift by immediate (predicated)
+SQSHL_zpzi 00000100 .. 000 110 100 ... .. ... ..... @rdn_pg_tszimm_shl
+UQSHL_zpzi 00000100 .. 000 111 100 ... .. ... ..... @rdn_pg_tszimm_shl
+SRSHR_zpzi 00000100 .. 001 100 100 ... .. ... ..... @rdn_pg_tszimm_shr
+URSHR_zpzi 00000100 .. 001 101 100 ... .. ... ..... @rdn_pg_tszimm_shr
+SQSHLU_zpzi 00000100 .. 001 111 100 ... .. ... ..... @rdn_pg_tszimm_shl
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index aa94df302a..1982d29a1e 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -2218,6 +2218,34 @@ DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD)
DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD)
DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD)
+#define DO_RSHR(x, sh) ((x >> sh) + ((x >> (sh - 1)) & 1))
+
+/* SVE2 bitwise shift by immediate */
+DO_ZPZI(sve2_sqshl_zpzi_b, int8_t, H1, do_sqshl_b)
+DO_ZPZI(sve2_sqshl_zpzi_h, int16_t, H1_2, do_sqshl_h)
+DO_ZPZI(sve2_sqshl_zpzi_s, int32_t, H1_4, do_sqshl_s)
+DO_ZPZI_D(sve2_sqshl_zpzi_d, int64_t, do_sqshl_d)
+
+DO_ZPZI(sve2_uqshl_zpzi_b, uint8_t, H1, do_uqshl_b)
+DO_ZPZI(sve2_uqshl_zpzi_h, uint16_t, H1_2, do_uqshl_h)
+DO_ZPZI(sve2_uqshl_zpzi_s, uint32_t, H1_4, do_uqshl_s)
+DO_ZPZI_D(sve2_uqshl_zpzi_d, uint64_t, do_uqshl_d)
+
+DO_ZPZI(sve2_srshr_zpzi_b, int8_t, H1, DO_RSHR)
+DO_ZPZI(sve2_srshr_zpzi_h, int16_t, H1_2, DO_RSHR)
+DO_ZPZI(sve2_srshr_zpzi_s, int32_t, H1_4, DO_RSHR)
+DO_ZPZI_D(sve2_srshr_zpzi_d, int64_t, DO_RSHR)
+
+DO_ZPZI(sve2_urshr_zpzi_b, uint8_t, H1, DO_RSHR)
+DO_ZPZI(sve2_urshr_zpzi_h, uint16_t, H1_2, DO_RSHR)
+DO_ZPZI(sve2_urshr_zpzi_s, uint32_t, H1_4, DO_RSHR)
+DO_ZPZI_D(sve2_urshr_zpzi_d, uint64_t, DO_RSHR)
+
+DO_ZPZI(sve2_sqshlu_zpzi_b, int8_t, H1, do_uqshl_b)
+DO_ZPZI(sve2_sqshlu_zpzi_h, int16_t, H1_2, do_uqshl_h)
+DO_ZPZI(sve2_sqshlu_zpzi_s, int32_t, H1_4, do_uqshl_s)
+DO_ZPZI_D(sve2_sqshlu_zpzi_d, int64_t, do_uqshl_d)
+
#undef DO_ASRD
#undef DO_ZPZI
#undef DO_ZPZI_D
@@ -2252,8 +2280,6 @@ DO_SHRNT(sve2_shrnt_h, uint16_t, uint8_t, H1_2, H1, DO_SHR)
DO_SHRNT(sve2_shrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_SHR)
DO_SHRNT(sve2_shrnt_d, uint64_t, uint32_t, , H1_4, DO_SHR)
-#define DO_RSHR(x, sh) ((x >> sh) + ((x >> (sh - 1)) & 1))
-
DO_SHRNB(sve2_rshrnb_h, uint16_t, uint8_t, DO_RSHR)
DO_SHRNB(sve2_rshrnb_s, uint32_t, uint16_t, DO_RSHR)
DO_SHRNB(sve2_rshrnb_d, uint64_t, uint32_t, DO_RSHR)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index a8e57ea5f4..d74fcf4e75 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -8253,3 +8253,22 @@ static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
}
return true;
}
+
+#define DO_SVE2_ZPZI(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rpri_esz *a) \
+{ \
+ static gen_helper_gvec_3 * const fns[] = { \
+ gen_helper_sve2_##name##_b, gen_helper_sve2_##name##_h, \
+ gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d \
+ }; \
+ if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { \
+ return false; \
+ } \
+ return do_zpzi_ool(s, a, fns[a->esz]); \
+}
+
+DO_SVE2_ZPZI(SQSHL_zpzi, sqshl_zpzi)
+DO_SVE2_ZPZI(UQSHL_zpzi, uqshl_zpzi)
+DO_SVE2_ZPZI(SRSHR_zpzi, srshr_zpzi)
+DO_SVE2_ZPZI(URSHR_zpzi, urshr_zpzi)
+DO_SVE2_ZPZI(SQSHLU_zpzi, sqshlu_zpzi)
--
2.17.1
On 4/30/20 9:28 AM, Stephen Long wrote: > Implements SQSHL/UQSHL, SRSHR/URSHR, and SQSHLU > > Signed-off-by: Stephen Long <steplong@quicinc.com> > --- > > I'm not too sure about the casting I'm doing willy nilly in the helper > functions, but I think they should be ok. Also, there might be some avenues > for optimization when translating similar to asr_zpzi and lsl_zpzi. No, the optimization would interfere with the saturation. The casting is ok except for sqshlu. Negative values should saturate to 0, but by passing them to the uqshl helper, the inputs get treated as large unsigned values and saturate to UINT_MAX. You need to use do_suqrshl_bhs for that. r~
© 2016 - 2024 Red Hat, Inc.