Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/helper.h | 9 +++++++++
target/arm/tcg/translate-sme.c | 14 ++++++++++++++
target/arm/tcg/vec_helper.c | 26 ++++++++++++++++++++++++++
target/arm/tcg/sme.decode | 18 ++++++++++++++++++
4 files changed, 67 insertions(+)
diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h
index b741470b51..37dd384659 100644
--- a/target/arm/tcg/helper.h
+++ b/target/arm/tcg/helper.h
@@ -799,14 +799,17 @@ DEF_HELPER_FLAGS_5(gvec_fmls_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i
DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_bfmla, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_bfmls, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_bfmls, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, fpst, i32)
@@ -838,6 +841,8 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_bfmla_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
@@ -845,6 +850,8 @@ DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_bfmls_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
@@ -852,6 +859,8 @@ DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_ah_bfmls_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 5f51369609..3f9a80248a 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -1038,6 +1038,15 @@ TRANS_FEAT(FMLA_nn_d, aa64_sme2_f64f64, do_fmla, a, true, FPST_ZA,
TRANS_FEAT(FMLS_nn_d, aa64_sme2_f64f64, do_fmla, a, true, FPST_ZA,
s->fpcr_ah ? gen_helper_gvec_ah_vfms_d : gen_helper_gvec_vfms_d)
+TRANS_FEAT(BFMLA_n1, aa64_sme_b16b16, do_fmla, a, false, FPST_ZA,
+ gen_helper_gvec_bfmla)
+TRANS_FEAT(BFMLS_n1, aa64_sme_b16b16, do_fmla, a, false, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_bfmls : gen_helper_gvec_bfmls)
+TRANS_FEAT(BFMLA_nn, aa64_sme_b16b16, do_fmla, a, true, FPST_ZA,
+ gen_helper_gvec_bfmla)
+TRANS_FEAT(BFMLS_nn, aa64_sme_b16b16, do_fmla, a, true, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_bfmls : gen_helper_gvec_bfmls)
+
static bool do_fmla_nx(DisasContext *s, arg_azx_n *a,
ARMFPStatusFlavour fpst, gen_helper_gvec_4_ptr *fn)
{
@@ -1058,6 +1067,11 @@ TRANS_FEAT(FMLA_nx_d, aa64_sme2_f64f64, do_fmla_nx, a, FPST_ZA,
TRANS_FEAT(FMLS_nx_d, aa64_sme2_f64f64, do_fmla_nx, a, FPST_ZA,
s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_d : gen_helper_gvec_fmls_idx_d)
+TRANS_FEAT(BFMLA_nx, aa64_sme_b16b16, do_fmla_nx, a, FPST_ZA,
+ gen_helper_gvec_bfmla_idx)
+TRANS_FEAT(BFMLS_nx, aa64_sme_b16b16, do_fmla_nx, a, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_bfmls_idx : gen_helper_gvec_bfmls_idx)
+
/*
* Expand array multi-vector single (n1), array multi-vector (nn),
* and array multi-vector indexed (nx), for integer accumulate.
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index e03db47ef7..f4373dd4bf 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -1608,6 +1608,12 @@ static float16 float16_muladd_f(float16 dest, float16 op1, float16 op2,
return float16_muladd(op1, op2, dest, 0, stat);
}
+static bfloat16 bfloat16_muladd_f(bfloat16 dest, bfloat16 op1, bfloat16 op2,
+ float_status *stat)
+{
+ return bfloat16_muladd(op1, op2, dest, 0, stat);
+}
+
static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2,
float_status *stat)
{
@@ -1626,6 +1632,12 @@ static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2,
return float16_muladd(float16_chs(op1), op2, dest, 0, stat);
}
+static bfloat16 bfloat16_mulsub_f(bfloat16 dest, bfloat16 op1, bfloat16 op2,
+ float_status *stat)
+{
+ return bfloat16_muladd(bfloat16_chs(op1), op2, dest, 0, stat);
+}
+
static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2,
float_status *stat)
{
@@ -1644,6 +1656,12 @@ static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2,
return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
}
+static bfloat16 bfloat16_ah_mulsub_f(bfloat16 dest, bfloat16 op1, bfloat16 op2,
+ float_status *stat)
+{
+ return bfloat16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
+}
+
static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2,
float_status *stat)
{
@@ -1677,14 +1695,19 @@ DO_MULADD(gvec_fmls_nf_s, float32_mulsub_nf, float32)
DO_MULADD(gvec_vfma_h, float16_muladd_f, float16)
DO_MULADD(gvec_vfma_s, float32_muladd_f, float32)
DO_MULADD(gvec_vfma_d, float64_muladd_f, float64)
+DO_MULADD(gvec_bfmla, bfloat16_muladd_f, bfloat16)
DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
+DO_MULADD(gvec_bfmls, bfloat16_mulsub_f, bfloat16)
DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16)
DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32)
DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64)
+DO_MULADD(gvec_ah_bfmls, bfloat16_ah_mulsub_f, bfloat16)
+
+#undef DO_MULADD
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
* For AdvSIMD, there is of course only one such vector segment.
@@ -1803,14 +1826,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
+DO_FMLA_IDX(gvec_bfmla_idx, bfloat16, H2, 0, 0)
DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
+DO_FMLA_IDX(gvec_bfmls_idx, bfloat16, H2, INT16_MIN, 0)
DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
+DO_FMLA_IDX(gvec_ah_bfmls_idx, bfloat16, H2, 0, float_muladd_negate_product)
#undef DO_FMLA_IDX
diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode
index 962ffae3d6..b3dd3bffc3 100644
--- a/target/arm/tcg/sme.decode
+++ b/target/arm/tcg/sme.decode
@@ -370,16 +370,22 @@ USMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=4
SUMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=2
SUMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=4
+BFMLA_n1 11000001 011 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2
FMLA_n1_h 11000001 001 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2
FMLA_n1_s 11000001 001 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2
FMLA_n1_d 11000001 011 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2
+
+BFMLA_n1 11000001 011 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4
FMLA_n1_h 11000001 001 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4
FMLA_n1_s 11000001 001 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4
FMLA_n1_d 11000001 011 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4
+BFMLS_n1 11000001 011 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2
FMLS_n1_h 11000001 001 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2
FMLS_n1_s 11000001 001 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2
FMLS_n1_d 11000001 011 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2
+
+BFMLS_n1 11000001 011 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4
FMLS_n1_h 11000001 001 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4
FMLS_n1_s 11000001 001 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4
FMLS_n1_d 11000001 011 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4
@@ -484,16 +490,22 @@ UMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4
USMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 0010 . @azz_2x2_o1x4
USMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 0010 . @azz_4x4_o1x4
+BFMLA_nn 11000001 111 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3
FMLA_nn_h 11000001 101 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3
FMLA_nn_s 11000001 101 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3
FMLA_nn_d 11000001 111 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3
+
+BFMLA_nn 11000001 111 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3
FMLA_nn_h 11000001 101 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3
FMLA_nn_s 11000001 101 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3
FMLA_nn_d 11000001 111 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3
+BFMLS_nn 11000001 111 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3
FMLS_nn_h 11000001 101 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3
FMLS_nn_s 11000001 101 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3
FMLS_nn_d 11000001 111 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3
+
+BFMLS_nn 11000001 111 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3
FMLS_nn_h 11000001 101 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3
FMLS_nn_s 11000001 101 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3
FMLS_nn_d 11000001 111 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3
@@ -652,16 +664,22 @@ SUMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 10 ... @azx_4x1_i4_o1
@azx_4x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \
&azx_n n=4 rv=%mova_rv zn=%zn_ax4 idx=%idx3_10_3
+BFMLA_nx 11000001 0001 .... 0 .. 1.. ....1 0 .... @azx_2x1_i3_o3
FMLA_nx_h 11000001 0001 .... 0 .. 1.. ....0 0 .... @azx_2x1_i3_o3
FMLA_nx_s 11000001 0101 .... 0 .. 0.. ....0 00 ... @azx_2x1_i2_o3
FMLA_nx_d 11000001 1101 .... 0 .. 00. ....0 00 ... @azx_2x1_i1_o3
+
+BFMLA_nx 11000001 0001 .... 1 .. 1.. ...01 0 .... @azx_4x1_i3_o3
FMLA_nx_h 11000001 0001 .... 1 .. 1.. ...00 0 .... @azx_4x1_i3_o3
FMLA_nx_s 11000001 0101 .... 1 .. 0.. ...00 00 ... @azx_4x1_i2_o3
FMLA_nx_d 11000001 1101 .... 1 .. 00. ...00 00 ... @azx_4x1_i1_o3
+BFMLS_nx 11000001 0001 .... 0 .. 1.. ....1 1 .... @azx_2x1_i3_o3
FMLS_nx_h 11000001 0001 .... 0 .. 1.. ....0 1 .... @azx_2x1_i3_o3
FMLS_nx_s 11000001 0101 .... 0 .. 0.. ....0 10 ... @azx_2x1_i2_o3
FMLS_nx_d 11000001 1101 .... 0 .. 00. ....0 10 ... @azx_2x1_i1_o3
+
+BFMLS_nx 11000001 0001 .... 1 .. 1.. ...01 1 .... @azx_4x1_i3_o3
FMLS_nx_h 11000001 0001 .... 1 .. 1.. ...00 1 .... @azx_4x1_i3_o3
FMLS_nx_s 11000001 0101 .... 1 .. 0.. ...00 10 ... @azx_4x1_i2_o3
FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ... @azx_4x1_i1_o3
--
2.43.0