Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/helper-sme.h | 9 +++++
target/arm/tcg/sme_helper.c | 64 ++++++++++++++++++++++++++++++++++
target/arm/tcg/translate-sme.c | 56 +++++++++++++++++++++++++++++
target/arm/tcg/sme.decode | 37 ++++++++++++++++++++
4 files changed, 166 insertions(+)
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
index 858d69188f..8246ce774c 100644
--- a/target/arm/tcg/helper-sme.h
+++ b/target/arm/tcg/helper-sme.h
@@ -33,6 +33,15 @@ DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 6e212aec49..bae48aa7d6 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -206,6 +206,50 @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
#undef DO_MOVA_Z
+void HELPER(sme2_mova_zc_b)(void *vdst, void *vsrc, uint32_t desc)
+{
+ const uint8_t *src = vsrc;
+ uint8_t *dst = vdst;
+ size_t i, n = simd_oprsz(desc);
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ }
+}
+
+void HELPER(sme2_mova_zc_h)(void *vdst, void *vsrc, uint32_t desc)
+{
+ const uint16_t *src = vsrc;
+ uint16_t *dst = vdst;
+ size_t i, n = simd_oprsz(desc) / 2;
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ }
+}
+
+void HELPER(sme2_mova_zc_s)(void *vdst, void *vsrc, uint32_t desc)
+{
+ const uint32_t *src = vsrc;
+ uint32_t *dst = vdst;
+ size_t i, n = simd_oprsz(desc) / 4;
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ }
+}
+
+void HELPER(sme2_mova_zc_d)(void *vdst, void *vsrc, uint32_t desc)
+{
+ const uint64_t *src = vsrc;
+ uint64_t *dst = vdst;
+ size_t i, n = simd_oprsz(desc) / 8;
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ }
+}
+
/*
* Clear elements in a tile slice comprising len bytes.
*/
@@ -314,6 +358,26 @@ static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
}
}
+void HELPER(sme2_mova_cz_b)(void *vdst, void *vsrc, uint32_t desc)
+{
+ copy_vertical_b(vdst, vsrc, simd_oprsz(desc));
+}
+
+void HELPER(sme2_mova_cz_h)(void *vdst, void *vsrc, uint32_t desc)
+{
+ copy_vertical_h(vdst, vsrc, simd_oprsz(desc));
+}
+
+void HELPER(sme2_mova_cz_s)(void *vdst, void *vsrc, uint32_t desc)
+{
+ copy_vertical_s(vdst, vsrc, simd_oprsz(desc));
+}
+
+void HELPER(sme2_mova_cz_d)(void *vdst, void *vsrc, uint32_t desc)
+{
+ copy_vertical_d(vdst, vsrc, simd_oprsz(desc));
+}
+
/*
* Host and TLB primitives for vertical tile slice addressing.
*/
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 908c3e8dd6..eed9345651 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -205,6 +205,62 @@ static bool do_mova_tile(DisasContext *s, arg_mova_p *a, bool to_vec)
TRANS_FEAT(MOVA_tz, aa64_sme, do_mova_tile, a, false)
TRANS_FEAT(MOVA_zt, aa64_sme, do_mova_tile, a, true)
+static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n, bool to_vec)
+{
+ static gen_helper_gvec_2 * const cz_fns[] = {
+ gen_helper_sme2_mova_cz_b, gen_helper_sme2_mova_cz_h,
+ gen_helper_sme2_mova_cz_s, gen_helper_sme2_mova_cz_d,
+ };
+ static gen_helper_gvec_2 * const zc_fns[] = {
+ gen_helper_sme2_mova_zc_b, gen_helper_sme2_mova_zc_h,
+ gen_helper_sme2_mova_zc_s, gen_helper_sme2_mova_zc_d,
+ };
+ TCGv_ptr t_za;
+ int svl;
+
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ svl = streaming_vec_reg_size(s);
+ if (svl == 16 && n == 4 && a->esz == MO_64) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ if (a->v) {
+ TCGv_i32 t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
+
+ for (int i = 0; i < n; ++i) {
+ TCGv_ptr t_zr = vec_full_reg_ptr(s, a->zr * n + i);
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za,
+ a->off * n + i, a->v);
+ if (to_vec) {
+ zc_fns[a->esz](t_zr, t_za, t_desc);
+ } else {
+ cz_fns[a->esz](t_za, t_zr, t_desc);
+ }
+ }
+ } else {
+ for (int i = 0; i < n; ++i) {
+ int zr_ofs = vec_full_reg_offset(s, a->zr * n + i);
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za,
+ a->off * n + i, a->v);
+ if (to_vec) {
+ tcg_gen_gvec_mov_var(MO_8, tcg_env, zr_ofs, t_za, 0, svl, svl);
+ } else {
+ tcg_gen_gvec_mov_var(MO_8, t_za, 0, tcg_env, zr_ofs, svl, svl);
+ }
+ }
+ }
+ return true;
+}
+
+TRANS_FEAT(MOVA_tz2, aa64_sme2, do_mova_tile_n, a, 2, false)
+TRANS_FEAT(MOVA_tz4, aa64_sme2, do_mova_tile_n, a, 4, false)
+TRANS_FEAT(MOVA_zt2, aa64_sme2, do_mova_tile_n, a, 2, true)
+TRANS_FEAT(MOVA_zt4, aa64_sme2, do_mova_tile_n, a, 4, true)
+
static bool do_movt(DisasContext *s, arg_MOVT_rzt *a,
void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
{
diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode
index 459b96805f..5eca5f4acf 100644
--- a/target/arm/tcg/sme.decode
+++ b/target/arm/tcg/sme.decode
@@ -28,6 +28,7 @@ ZERO_zt0 11000000 01 001 00000000000 00000001
%mova_rs 13:2 !function=plus_12
&mova_p esz rs pg zr za off v:bool
+&mova_t esz rs zr za off v:bool
MOVA_tz 11000000 00 00000 0 v:1 .. pg:3 zr:5 0 off:4 \
&mova_p rs=%mova_rs esz=0 za=0
@@ -51,6 +52,42 @@ MOVA_zt 11000000 11 00001 0 v:1 .. pg:3 0 za:3 off:1 zr:5 \
MOVA_zt 11000000 11 00001 1 v:1 .. pg:3 0 za:4 zr:5 \
&mova_p rs=%mova_rs esz=4 off=0
+MOVA_tz2 11000000 00 00010 0 v:1 .. 000 zr:4 0 00 off:3 \
+ &mova_t rs=%mova_rs esz=0 za=0
+MOVA_tz2 11000000 01 00010 0 v:1 .. 000 zr:4 0 00 za:1 off:2 \
+ &mova_t rs=%mova_rs esz=1
+MOVA_tz2 11000000 10 00010 0 v:1 .. 000 zr:4 0 00 za:2 off:1 \
+ &mova_t rs=%mova_rs esz=2
+MOVA_tz2 11000000 11 00010 0 v:1 .. 000 zr:4 0 00 za:3 \
+ &mova_t rs=%mova_rs esz=3 off=0
+
+MOVA_zt2 11000000 00 00011 0 v:1 .. 000 00 off:3 zr:4 0 \
+ &mova_t rs=%mova_rs esz=0 za=0
+MOVA_zt2 11000000 01 00011 0 v:1 .. 000 00 za:1 off:2 zr:4 0 \
+ &mova_t rs=%mova_rs esz=1
+MOVA_zt2 11000000 10 00011 0 v:1 .. 000 00 za:2 off:1 zr:4 0 \
+ &mova_t rs=%mova_rs esz=2
+MOVA_zt2 11000000 11 00011 0 v:1 .. 000 00 za:3 zr:4 0 \
+ &mova_t rs=%mova_rs esz=3 off=0
+
+MOVA_tz4 11000000 00 00010 0 v:1 .. 001 zr:3 00 000 off:2 \
+ &mova_t rs=%mova_rs esz=0 za=0
+MOVA_tz4 11000000 01 00010 0 v:1 .. 001 zr:3 00 000 za:1 off:1 \
+ &mova_t rs=%mova_rs esz=1
+MOVA_tz4 11000000 10 00010 0 v:1 .. 001 zr:3 00 000 za:2 \
+ &mova_t rs=%mova_rs esz=2 off=0
+MOVA_tz4 11000000 11 00010 0 v:1 .. 001 zr:3 00 00 za:3 \
+ &mova_t rs=%mova_rs esz=3 off=0
+
+MOVA_zt4 11000000 00 00011 0 v:1 .. 001 000 off:2 zr:3 00 \
+ &mova_t rs=%mova_rs esz=0 za=0
+MOVA_zt4 11000000 01 00011 0 v:1 .. 001 000 za:1 off:1 zr:3 00 \
+ &mova_t rs=%mova_rs esz=1
+MOVA_zt4 11000000 10 00011 0 v:1 .. 001 000 za:2 zr:3 00 \
+ &mova_t rs=%mova_rs esz=2 off=0
+MOVA_zt4 11000000 11 00011 0 v:1 .. 001 00 za:3 zr:3 00 \
+ &mova_t rs=%mova_rs esz=3 off=0
+
### SME Move into/from ZT0
MOVT_rzt 1100 0000 0100 1100 0 off:3 00 11111 rt:5
--
2.43.0