Adds new functions to the gvec API for truncating, sign- or zero
extending vector elements. Currently implemented as helper functions,
these may be mapped onto host vector instructions in the future.
For the time being, allows translation of more complicated vector
instructions by helper-to-tcg.
Signed-off-by: Anton Johansson <anjo@rev.ng>
---
accel/tcg/tcg-runtime-gvec.c | 41 +++++++++++++++++
accel/tcg/tcg-runtime.h | 22 +++++++++
include/tcg/tcg-op-gvec-common.h | 18 ++++++++
tcg/tcg-op-gvec.c | 78 ++++++++++++++++++++++++++++++++
4 files changed, 159 insertions(+)
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index afca89baa1..685c991e6a 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -1569,3 +1569,44 @@ void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
}
clear_high(d, oprsz, desc);
}
+
+#define DO_SZ_OP1(NAME, DSTTY, SRCTY) \
+void HELPER(NAME)(void *d, void *a, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t elsz = oprsz/sizeof(DSTTY); \
+ intptr_t i; \
+ \
+ for (i = 0; i < elsz; ++i) { \
+ SRCTY aa = *((SRCTY *) a + i); \
+ *((DSTTY *) d + i) = aa; \
+ } \
+ clear_high(d, oprsz, desc); \
+}
+
+#define DO_SZ_OP2(NAME, INTTY, DSTSZ, SRCSZ) \
+ DO_SZ_OP1(NAME##SRCSZ##_##DSTSZ, INTTY##DSTSZ##_t, INTTY##SRCSZ##_t)
+
+DO_SZ_OP2(gvec_trunc, uint, 32, 64)
+DO_SZ_OP2(gvec_trunc, uint, 16, 64)
+DO_SZ_OP2(gvec_trunc, uint, 8, 64)
+DO_SZ_OP2(gvec_trunc, uint, 16, 32)
+DO_SZ_OP2(gvec_trunc, uint, 8, 32)
+DO_SZ_OP2(gvec_trunc, uint, 8, 16)
+
+DO_SZ_OP2(gvec_zext, uint, 64, 32)
+DO_SZ_OP2(gvec_zext, uint, 64, 16)
+DO_SZ_OP2(gvec_zext, uint, 64, 8)
+DO_SZ_OP2(gvec_zext, uint, 32, 16)
+DO_SZ_OP2(gvec_zext, uint, 32, 8)
+DO_SZ_OP2(gvec_zext, uint, 16, 8)
+
+DO_SZ_OP2(gvec_sext, int, 64, 32)
+DO_SZ_OP2(gvec_sext, int, 64, 16)
+DO_SZ_OP2(gvec_sext, int, 64, 8)
+DO_SZ_OP2(gvec_sext, int, 32, 16)
+DO_SZ_OP2(gvec_sext, int, 32, 8)
+DO_SZ_OP2(gvec_sext, int, 16, 8)
+
+#undef DO_SZ_OP1
+#undef DO_SZ_OP2
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 0a4d31eb48..5045655bf8 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -1,3 +1,4 @@
+#include "tcg/tcg.h"
DEF_HELPER_FLAGS_2(div_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32)
DEF_HELPER_FLAGS_2(rem_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32)
DEF_HELPER_FLAGS_2(divu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
@@ -328,3 +329,24 @@ DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_trunc64_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc64_16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc64_8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc32_16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc32_8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc16_8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_zext32_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext16_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext8_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext16_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext8_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext8_16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sext32_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext16_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext8_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext16_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext8_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext8_16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h
index 65553f5f97..39b0c2f64e 100644
--- a/include/tcg/tcg-op-gvec-common.h
+++ b/include/tcg/tcg-op-gvec-common.h
@@ -390,6 +390,24 @@ void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t cofs,
uint32_t oprsz, uint32_t maxsz);
+/*
+ * Perform vector element truncation/extension operations
+ */
+
+void tcg_gen_gvec_trunc(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz);
+
+void tcg_gen_gvec_zext(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz);
+
+void tcg_gen_gvec_sext(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz);
/*
* 64-bit vector operations. Use these when the register has been allocated
* with tcg_global_mem_new_i64, and so we cannot also address it via pointer.
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 97e4df221a..80649dc0d2 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -4008,3 +4008,81 @@ void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs,
tcg_gen_gvec_4(dofs, aofs, bofs, cofs, oprsz, maxsz, &g);
}
+
+void tcg_gen_gvec_trunc(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz)
+{
+ gen_helper_gvec_2 * const fns[4][4] = {
+ [MO_64] = {
+ [MO_32] = gen_helper_gvec_trunc64_32,
+ [MO_16] = gen_helper_gvec_trunc64_16,
+ [MO_8] = gen_helper_gvec_trunc64_8,
+ },
+ [MO_32] = {
+ [MO_16] = gen_helper_gvec_trunc32_16,
+ [MO_8] = gen_helper_gvec_trunc32_8,
+ },
+ [MO_16] = {
+ [MO_8] = gen_helper_gvec_trunc16_8,
+ },
+ };
+
+ gen_helper_gvec_2 *fn = fns[vecse][vecde];
+ tcg_debug_assert(fn != 0 && vecse > vecde);
+
+ tcg_gen_gvec_2_ool(dofs, aofs, doprsz, maxsz, 0, fn);
+}
+
+void tcg_gen_gvec_zext(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz)
+{
+ gen_helper_gvec_2 * const fns[4][4] = {
+ [MO_8] = {
+ [MO_16] = gen_helper_gvec_zext8_16,
+ [MO_32] = gen_helper_gvec_zext8_32,
+ [MO_64] = gen_helper_gvec_zext8_64,
+ },
+ [MO_16] = {
+ [MO_32] = gen_helper_gvec_zext16_32,
+ [MO_64] = gen_helper_gvec_zext16_64,
+ },
+ [MO_32] = {
+ [MO_64] = gen_helper_gvec_zext32_64,
+ },
+ };
+
+ gen_helper_gvec_2 *fn = fns[vecse][vecde];
+ tcg_debug_assert(fn != 0 && vecse < vecde);
+
+ tcg_gen_gvec_2_ool(dofs, aofs, doprsz, maxsz, 0, fn);
+}
+
+void tcg_gen_gvec_sext(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz)
+{
+ gen_helper_gvec_2 * const fns[4][4] = {
+ [MO_8] = {
+ [MO_16] = gen_helper_gvec_sext8_16,
+ [MO_32] = gen_helper_gvec_sext8_32,
+ [MO_64] = gen_helper_gvec_sext8_64,
+ },
+ [MO_16] = {
+ [MO_32] = gen_helper_gvec_sext16_32,
+ [MO_64] = gen_helper_gvec_sext16_64,
+ },
+ [MO_32] = {
+ [MO_64] = gen_helper_gvec_sext32_64,
+ },
+ };
+
+ gen_helper_gvec_2 *fn = fns[vecse][vecde];
+ tcg_debug_assert(fn != 0 && vecse < vecde);
+
+ tcg_gen_gvec_2_ool(dofs, aofs, doprsz, maxsz, 0, fn);
+}
--
2.45.2