Currently we implement BFCVT scalar via do_fp1_scalar(). This works
even though BFCVT is a narrowing operation from 32 to 16 bits,
because we can use write_fp_sreg() for float16. However, FPCR.NEP
support requires that we use write_fp_hreg_merging() for float16
outputs, so we can't continue to borrow the non-narrowing
do_fp1_scalar() function for this. Split out trans_BFCVT_s()
into its own implementation that honours FPCR.NEP.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 66c214ed278..944bdf8cafe 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -8582,10 +8582,27 @@ static const FPScalar1 f_scalar_frintx = {
};
TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
-static const FPScalar1 f_scalar_bfcvt = {
- .gen_s = gen_helper_bfcvt,
-};
-TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1)
+static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
+{
+ ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64;
+ TCGv_i32 t32;
+ int check;
+
+ if (!dc_isar_feature(aa64_bf16, s)) {
+ return false;
+ }
+
+ check = fp_access_check_scalar_hsd(s, a->esz);
+
+ if (check <= 0) {
+ return check == 0;
+ }
+
+ t32 = read_fp_sreg(s, a->rn);
+ gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
+ write_fp_hreg_merging(s, a->rd, a->rd, t32);
+ return true;
+}
static const FPScalar1 f_scalar_frint32 = {
NULL,
--
2.34.1