Pass the env pointer through to the gvec_bfdot helper,
so we can use it to add support for FEAT_EBF16.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/helper.h | 4 ++--
target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++++++-
target/arm/tcg/translate-neon.c | 35 +++++++++++++++++++++++++++++++--
target/arm/tcg/translate-sve.c | 15 +++++++++++++-
target/arm/tcg/vec_helper.c | 3 ++-
5 files changed, 77 insertions(+), 7 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 970d059dec5..aece9fd4aa7 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1027,8 +1027,8 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 148be2826ec..4aef8b9211a 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -735,6 +735,22 @@ static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
}
+/*
+ * Expand a 4-operand operation using an out-of-line helper that takes
+ * a pointer to the CPU env.
+ */
+static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, int ra, int data,
+ gen_helper_gvec_4_ptr *fn)
+{
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra),
+ tcg_env,
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
/*
* Expand a 4-operand + fpstatus pointer + simd data value operation using
* an out-of-line helper.
@@ -5601,10 +5617,19 @@ static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
return true;
}
+static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
+ }
+ return true;
+}
+
TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
-TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot)
+TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfmmla)
TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
index 915c9e56db5..454380f01d7 100644
--- a/target/arm/tcg/translate-neon.c
+++ b/target/arm/tcg/translate-neon.c
@@ -148,6 +148,37 @@ static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
return true;
}
+static bool do_neon_ddda_env(DisasContext *s, int q, int vd, int vn, int vm,
+ int data, gen_helper_gvec_4_ptr *fn_gvec)
+{
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
+ return false;
+ }
+
+ /*
+ * UNDEF accesses to odd registers for each bit of Q.
+ * Q will be 0b111 for all Q-reg instructions, otherwise
+ * when we have mixed Q- and D-reg inputs.
+ */
+ if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ int opr_sz = q ? 16 : 8;
+ tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
+ vfp_reg_offset(1, vn),
+ vfp_reg_offset(1, vm),
+ vfp_reg_offset(1, vd),
+ tcg_env,
+ opr_sz, opr_sz, data, fn_gvec);
+ return true;
+}
+
static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
int data, ARMFPStatusFlavour fp_flavour,
gen_helper_gvec_4_ptr *fn_gvec_ptr)
@@ -266,8 +297,8 @@ static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
if (!dc_isar_feature(aa32_bf16, s)) {
return false;
}
- return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
- gen_helper_gvec_bfdot);
+ return do_neon_ddda_env(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_bfdot);
}
static bool trans_VFML(DisasContext *s, arg_VFML *a)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 798ab2bfb13..4fb0bd077b4 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -238,6 +238,19 @@ static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
return ret;
}
+static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ int rd, int rn, int rm, int ra,
+ int data)
+{
+ return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env);
+}
+
+static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ arg_rrrr_esz *a, int data)
+{
+ return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
+}
+
/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
int rd, int rn, int rm, int ra, int pg,
@@ -7099,7 +7112,7 @@ TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
gen_helper_gvec_ummla_b, a, 0)
-TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
+TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
gen_helper_gvec_bfdot, a, 0)
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
gen_helper_gvec_bfdot_idx, a)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 98604d170fd..37aad4be4b0 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2814,7 +2814,8 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
return t1;
}
-void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va,
+ void *envp, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
float32 *d = vd, *a = va;
--
2.34.1
On 7/31/24 02:03, Peter Maydell wrote: > Pass the env pointer through to the gvec_bfdot helper, > so we can use it to add support for FEAT_EBF16. > > Signed-off-by: Peter Maydell <peter.maydell@linaro.org> > --- > target/arm/helper.h | 4 ++-- > target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++++++- > target/arm/tcg/translate-neon.c | 35 +++++++++++++++++++++++++++++++-- > target/arm/tcg/translate-sve.c | 15 +++++++++++++- > target/arm/tcg/vec_helper.c | 3 ++- > 5 files changed, 77 insertions(+), 7 deletions(-) > > diff --git a/target/arm/helper.h b/target/arm/helper.h > index 970d059dec5..aece9fd4aa7 100644 > --- a/target/arm/helper.h > +++ b/target/arm/helper.h > @@ -1027,8 +1027,8 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG, > DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG, > void, ptr, ptr, ptr, ptr, i32) > > -DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG, > - void, ptr, ptr, ptr, ptr, i32) > +DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG, > + void, ptr, ptr, ptr, ptr, ptr, i32) Because env expands to TCGv_ptr in the translation context, I suspect that you can use that here. Worth a try, anyway, so that > -void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc) > +void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, > + void *envp, uint32_t desc) this doesn't have to use void *. Either way, Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
On Wed, 31 Jul 2024 at 02:36, Richard Henderson <richard.henderson@linaro.org> wrote: > > On 7/31/24 02:03, Peter Maydell wrote: > > Pass the env pointer through to the gvec_bfdot helper, > > so we can use it to add support for FEAT_EBF16. > > > > Signed-off-by: Peter Maydell <peter.maydell@linaro.org> > > --- > > target/arm/helper.h | 4 ++-- > > target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++++++- > > target/arm/tcg/translate-neon.c | 35 +++++++++++++++++++++++++++++++-- > > target/arm/tcg/translate-sve.c | 15 +++++++++++++- > > target/arm/tcg/vec_helper.c | 3 ++- > > 5 files changed, 77 insertions(+), 7 deletions(-) > > > > diff --git a/target/arm/helper.h b/target/arm/helper.h > > index 970d059dec5..aece9fd4aa7 100644 > > --- a/target/arm/helper.h > > +++ b/target/arm/helper.h > > @@ -1027,8 +1027,8 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG, > > DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG, > > void, ptr, ptr, ptr, ptr, i32) > > > > -DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG, > > - void, ptr, ptr, ptr, ptr, i32) > > +DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG, > > + void, ptr, ptr, ptr, ptr, ptr, i32) > > Because env expands to TCGv_ptr in the translation context, I suspect that you can use > that here. Worth a try, anyway, so that > > > -void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc) > > +void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, > > + void *envp, uint32_t desc) > > this doesn't have to use void *. I thought I'd tried that, but obviously I didn't hit on the right combination of types in the prototype/definition. This does work, so I've changed the patchset to use it. thanks -- PMM
© 2016 - 2024 Red Hat, Inc.