FEAT_SVE_B16B16 adds bfloat16 versions of the FMLA and FMLS insns in
the SVE floating-point multiply-add (indexed) insn group. Implement
these.
Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max")
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/tcg/sve.decode | 2 ++
target/arm/tcg/translate-sve.c | 25 ++++++++++++++++---------
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index a76f2236f43..a77b725c876 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -1052,9 +1052,11 @@ FCMLA_zzxz 01100100 11 1 index:1 rm:4 0001 rot:2 rn:5 rd:5 \
### SVE FP Multiply-Add Indexed Group
# SVE floating-point multiply-add (indexed)
+FMLA_zzxz 01100100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=0
FMLA_zzxz 01100100 0. 1 ..... 000000 ..... ..... @rrxr_3 esz=1
FMLA_zzxz 01100100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2
FMLA_zzxz 01100100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3
+FMLS_zzxz 01100100 0. 1 ..... 000011 ..... ..... @rrxr_3 esz=0
FMLS_zzxz 01100100 0. 1 ..... 000001 ..... ..... @rrxr_3 esz=1
FMLS_zzxz 01100100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2
FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 37ecbc2b7c0..fc76624b5a1 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3883,24 +3883,31 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
*** SVE Floating Point Multiply-Add Indexed Group
*/
+static bool do_fmla_zzxz(DisasContext *s, arg_rrxr_esz *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ /* These insns use MO_8 to encode BFloat16 */
+ if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
+ return false;
+ }
+ return gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+}
+
static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
- NULL, gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_bfmla_idx, gen_helper_gvec_fmla_idx_h,
gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
};
-TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
- fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+TRANS_FEAT(FMLA_zzxz, aa64_sve, do_fmla_zzxz, a, fmla_idx_fns[a->esz])
static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
- { NULL, NULL },
+ { gen_helper_gvec_bfmls_idx, gen_helper_gvec_ah_bfmls_idx },
{ gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
{ gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
{ gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
};
-TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
- fmls_idx_fns[a->esz][s->fpcr_ah],
- a->rd, a->rn, a->rm, a->ra, a->index,
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+TRANS_FEAT(FMLS_zzxz, aa64_sve, do_fmla_zzxz, a,
+ fmls_idx_fns[a->esz][s->fpcr_ah])
/*
*** SVE Floating Point Multiply Indexed Group
--
2.43.0