target/sparc: floating-point cleanup

[PATCH 05/22] target/sparc: Inline FNEG, FABS

Posted by Richard Henderson 1 year ago

These are simple bit manipulation insns.
Begin using i128 for float128.
Implement FMOVq with do_qq.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/sparc/helper.h     |  6 ----
 target/sparc/fop_helper.c | 34 ---------------------
 target/sparc/translate.c  | 62 +++++++++++++++++++--------------------
 3 files changed, 30 insertions(+), 72 deletions(-)

diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index 55eff66283..74a1575d21 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -37,7 +37,6 @@ DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32)
 #endif
 DEF_HELPER_FLAGS_1(check_ieee_exceptions, TCG_CALL_NO_WG, tl, env)
 DEF_HELPER_FLAGS_2(set_fsr, TCG_CALL_NO_RWG, void, env, tl)
-DEF_HELPER_FLAGS_1(fabss, TCG_CALL_NO_RWG_SE, f32, f32)
 DEF_HELPER_FLAGS_2(fsqrts, TCG_CALL_NO_RWG, f32, env, f32)
 DEF_HELPER_FLAGS_2(fsqrtd, TCG_CALL_NO_RWG, f64, env, f64)
 DEF_HELPER_FLAGS_3(fcmps, TCG_CALL_NO_WG, tl, env, f32, f32)
@@ -48,7 +47,6 @@ DEF_HELPER_FLAGS_1(fsqrtq, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_1(fcmpq, TCG_CALL_NO_WG, tl, env)
 DEF_HELPER_FLAGS_1(fcmpeq, TCG_CALL_NO_WG, tl, env)
 #ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_1(fabsd, TCG_CALL_NO_RWG_SE, f64, f64)
 DEF_HELPER_FLAGS_3(fcmps_fcc1, TCG_CALL_NO_WG, tl, env, f32, f32)
 DEF_HELPER_FLAGS_3(fcmps_fcc2, TCG_CALL_NO_WG, tl, env, f32, f32)
 DEF_HELPER_FLAGS_3(fcmps_fcc3, TCG_CALL_NO_WG, tl, env, f32, f32)
@@ -61,7 +59,6 @@ DEF_HELPER_FLAGS_3(fcmpes_fcc3, TCG_CALL_NO_WG, tl, env, f32, f32)
 DEF_HELPER_FLAGS_3(fcmped_fcc1, TCG_CALL_NO_WG, tl, env, f64, f64)
 DEF_HELPER_FLAGS_3(fcmped_fcc2, TCG_CALL_NO_WG, tl, env, f64, f64)
 DEF_HELPER_FLAGS_3(fcmped_fcc3, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_1(fabsq, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_1(fcmpq_fcc1, TCG_CALL_NO_WG, tl, env)
 DEF_HELPER_FLAGS_1(fcmpq_fcc2, TCG_CALL_NO_WG, tl, env)
 DEF_HELPER_FLAGS_1(fcmpq_fcc3, TCG_CALL_NO_WG, tl, env)
@@ -90,15 +87,12 @@ DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_RWG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_RWG, f64, env, f32, f32)
 DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_RWG, void, env, f64, f64)
 
-DEF_HELPER_FLAGS_1(fnegs, TCG_CALL_NO_RWG_SE, f32, f32)
 DEF_HELPER_FLAGS_2(fitod, TCG_CALL_NO_RWG_SE, f64, env, s32)
 DEF_HELPER_FLAGS_2(fitoq, TCG_CALL_NO_RWG, void, env, s32)
 
 DEF_HELPER_FLAGS_2(fitos, TCG_CALL_NO_RWG, f32, env, s32)
 
 #ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_1(fnegd, TCG_CALL_NO_RWG_SE, f64, f64)
-DEF_HELPER_FLAGS_1(fnegq, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(fxtos, TCG_CALL_NO_RWG, f32, env, s64)
 DEF_HELPER_FLAGS_2(fxtod, TCG_CALL_NO_RWG, f64, env, s64)
 DEF_HELPER_FLAGS_2(fxtoq, TCG_CALL_NO_RWG, void, env, s64)
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 0f8aa3abcd..d6fb769769 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -114,23 +114,6 @@ void helper_fdmulq(CPUSPARCState *env, float64 src1, float64 src2)
                        &env->fp_status);
 }
 
-float32 helper_fnegs(float32 src)
-{
-    return float32_chs(src);
-}
-
-#ifdef TARGET_SPARC64
-float64 helper_fnegd(float64 src)
-{
-    return float64_chs(src);
-}
-
-F_HELPER(neg, q)
-{
-    QT0 = float128_chs(QT1);
-}
-#endif
-
 /* Integer to float conversion.  */
 float32 helper_fitos(CPUSPARCState *env, int32_t src)
 {
@@ -229,23 +212,6 @@ int64_t helper_fqtox(CPUSPARCState *env)
 }
 #endif
 
-float32 helper_fabss(float32 src)
-{
-    return float32_abs(src);
-}
-
-#ifdef TARGET_SPARC64
-float64 helper_fabsd(float64 src)
-{
-    return float64_abs(src);
-}
-
-void helper_fabsq(CPUSPARCState *env)
-{
-    QT0 = float128_abs(QT1);
-}
-#endif
-
 float32 helper_fsqrts(CPUSPARCState *env, float32 src)
 {
     return float32_sqrt(src, &env->fp_status);
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 0e494d3ebd..254f185b83 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -43,9 +43,7 @@
 #else
 # define gen_helper_clear_softint(E, S)         qemu_build_not_reached()
 # define gen_helper_done(E)                     qemu_build_not_reached()
-# define gen_helper_fabsd(D, S)                 qemu_build_not_reached()
 # define gen_helper_flushw(E)                   qemu_build_not_reached()
-# define gen_helper_fnegd(D, S)                 qemu_build_not_reached()
 # define gen_helper_rdccr(D, E)                 qemu_build_not_reached()
 # define gen_helper_rdcwp(D, E)                 qemu_build_not_reached()
 # define gen_helper_restored(E)                 qemu_build_not_reached()
@@ -61,7 +59,6 @@
 # define gen_helper_write_softint(E, S)         qemu_build_not_reached()
 # define gen_helper_wrpil(E, S)                 qemu_build_not_reached()
 # define gen_helper_wrpstate(E, S)              qemu_build_not_reached()
-# define gen_helper_fabsq                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpeq16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpeq32             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpgt16             ({ qemu_build_not_reached(); NULL; })
@@ -79,7 +76,6 @@
 # define gen_helper_fmul8x16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fmuld8sux16          ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fmuld8ulx16          ({ qemu_build_not_reached(); NULL; })
-# define gen_helper_fnegq                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fpmerge              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fqtox                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fstox                ({ qemu_build_not_reached(); NULL; })
@@ -1239,13 +1235,13 @@ static void gen_op_fmovs(TCGv_i32 dst, TCGv_i32 src)
 static void gen_op_fnegs(TCGv_i32 dst, TCGv_i32 src)
 {
     gen_op_clear_ieee_excp_and_FTT();
-    gen_helper_fnegs(dst, src);
+    tcg_gen_xori_i32(dst, src, 1u << 31);
 }
 
 static void gen_op_fabss(TCGv_i32 dst, TCGv_i32 src)
 {
     gen_op_clear_ieee_excp_and_FTT();
-    gen_helper_fabss(dst, src);
+    tcg_gen_andi_i32(dst, src, ~(1u << 31));
 }
 
 static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
@@ -1257,13 +1253,33 @@ static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
 static void gen_op_fnegd(TCGv_i64 dst, TCGv_i64 src)
 {
     gen_op_clear_ieee_excp_and_FTT();
-    gen_helper_fnegd(dst, src);
+    tcg_gen_xori_i64(dst, src, 1ull << 63);
 }
 
 static void gen_op_fabsd(TCGv_i64 dst, TCGv_i64 src)
 {
     gen_op_clear_ieee_excp_and_FTT();
-    gen_helper_fabsd(dst, src);
+    tcg_gen_andi_i64(dst, src, ~(1ull << 63));
+}
+
+static void gen_op_fnegq(TCGv_i128 dst, TCGv_i128 src)
+{
+    TCGv_i64 l = tcg_temp_new_i64();
+    TCGv_i64 h = tcg_temp_new_i64();
+
+    tcg_gen_extr_i128_i64(l, h, src);
+    tcg_gen_xori_i64(h, h, 1ull << 63);
+    tcg_gen_concat_i64_i128(dst, l, h);
+}
+
+static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
+{
+    TCGv_i64 l = tcg_temp_new_i64();
+    TCGv_i64 h = tcg_temp_new_i64();
+
+    tcg_gen_extr_i128_i64(l, h, src);
+    tcg_gen_andi_i64(h, h, ~(1ull << 63));
+    tcg_gen_concat_i64_i128(dst, l, h);
 }
 
 #ifdef TARGET_SPARC64
@@ -4629,13 +4645,11 @@ TRANS(FiTOd, ALL, do_env_df, a, gen_helper_fitod)
 TRANS(FsTOd, ALL, do_env_df, a, gen_helper_fstod)
 TRANS(FsTOx, 64, do_env_df, a, gen_helper_fstox)
 
-static bool trans_FMOVq(DisasContext *dc, arg_FMOVq *a)
+static bool do_qq(DisasContext *dc, arg_r_r *a,
+                  void (*func)(TCGv_i128, TCGv_i128))
 {
     TCGv_i128 t;
 
-    if (!avail_64(dc)) {
-        return false;
-    }
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
@@ -4645,30 +4659,14 @@ static bool trans_FMOVq(DisasContext *dc, arg_FMOVq *a)
 
     gen_op_clear_ieee_excp_and_FTT();
     t = gen_load_fpr_Q(dc, a->rs);
+    func(t, t);
     gen_store_fpr_Q(dc, a->rd, t);
     return advance_pc(dc);
 }
 
-static bool do_qq(DisasContext *dc, arg_r_r *a,
-                  void (*func)(TCGv_env))
-{
-    if (gen_trap_ifnofpu(dc)) {
-        return true;
-    }
-    if (gen_trap_float128(dc)) {
-        return true;
-    }
-
-    gen_op_clear_ieee_excp_and_FTT();
-    gen_op_load_fpr_QT1(QFPREG(a->rs));
-    func(tcg_env);
-    gen_op_store_QT0_fpr(QFPREG(a->rd));
-    gen_update_fprs_dirty(dc, QFPREG(a->rd));
-    return advance_pc(dc);
-}
-
-TRANS(FNEGq, 64, do_qq, a, gen_helper_fnegq)
-TRANS(FABSq, 64, do_qq, a, gen_helper_fabsq)
+TRANS(FMOVq, 64, do_qq, a, tcg_gen_mov_i128)
+TRANS(FNEGq, 64, do_qq, a, gen_op_fnegq)
+TRANS(FABSq, 64, do_qq, a, gen_op_fabsq)
 
 static bool do_env_qq(DisasContext *dc, arg_r_r *a,
                        void (*func)(TCGv_env))
-- 
2.34.1

Re: [PATCH 05/22] target/sparc: Inline FNEG, FABS

Posted by Philippe Mathieu-Daudé 10 months ago

Hi Richard,

On 3/11/23 18:38, Richard Henderson wrote:
> These are simple bit manipulation insns.
> Begin using i128 for float128.
> Implement FMOVq with do_qq.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/sparc/helper.h     |  6 ----
>   target/sparc/fop_helper.c | 34 ---------------------
>   target/sparc/translate.c  | 62 +++++++++++++++++++--------------------
>   3 files changed, 30 insertions(+), 72 deletions(-)


> @@ -1239,13 +1235,13 @@ static void gen_op_fmovs(TCGv_i32 dst, TCGv_i32 src)
>   static void gen_op_fnegs(TCGv_i32 dst, TCGv_i32 src)
>   {
>       gen_op_clear_ieee_excp_and_FTT();
> -    gen_helper_fnegs(dst, src);
> +    tcg_gen_xori_i32(dst, src, 1u << 31);
>   }
>   
>   static void gen_op_fabss(TCGv_i32 dst, TCGv_i32 src)
>   {
>       gen_op_clear_ieee_excp_and_FTT();
> -    gen_helper_fabss(dst, src);
> +    tcg_gen_andi_i32(dst, src, ~(1u << 31));
>   }
>   
>   static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
> @@ -1257,13 +1253,33 @@ static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
>   static void gen_op_fnegd(TCGv_i64 dst, TCGv_i64 src)
>   {
>       gen_op_clear_ieee_excp_and_FTT();
> -    gen_helper_fnegd(dst, src);
> +    tcg_gen_xori_i64(dst, src, 1ull << 63);
>   }
>   
>   static void gen_op_fabsd(TCGv_i64 dst, TCGv_i64 src)
>   {
>       gen_op_clear_ieee_excp_and_FTT();
> -    gen_helper_fabsd(dst, src);
> +    tcg_gen_andi_i64(dst, src, ~(1ull << 63));
> +}
> +
> +static void gen_op_fnegq(TCGv_i128 dst, TCGv_i128 src)
> +{
> +    TCGv_i64 l = tcg_temp_new_i64();
> +    TCGv_i64 h = tcg_temp_new_i64();
> +
> +    tcg_gen_extr_i128_i64(l, h, src);
> +    tcg_gen_xori_i64(h, h, 1ull << 63);
> +    tcg_gen_concat_i64_i128(dst, l, h);
> +}
> +
> +static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
> +{
> +    TCGv_i64 l = tcg_temp_new_i64();
> +    TCGv_i64 h = tcg_temp_new_i64();
> +
> +    tcg_gen_extr_i128_i64(l, h, src);
> +    tcg_gen_andi_i64(h, h, ~(1ull << 63));
> +    tcg_gen_concat_i64_i128(dst, l, h);
>   }

Why not extract these as generic TCG FPU helpers?

$ git grep -wE 'float...?_(chs|abs)' target/
target/arm/tcg/helper-a64.c:214:    a = float16_chs(a);
target/arm/tcg/helper-a64.c:229:    a = float32_chs(a);
target/arm/tcg/helper-a64.c:244:    a = float64_chs(a);
target/arm/tcg/helper-a64.c:259:    a = float16_chs(a);
target/arm/tcg/helper-a64.c:274:    a = float32_chs(a);
target/arm/tcg/helper-a64.c:289:    a = float64_chs(a);
target/arm/tcg/helper-a64.c:632:    float16 f0 = float16_abs(a);
target/arm/tcg/helper-a64.c:633:    float16 f1 = float16_abs(b);
target/arm/tcg/helper-a64.c:642:    float16 f0 = float16_abs(a);
target/arm/tcg/helper-a64.c:643:    float16 f1 = float16_abs(b);
target/arm/tcg/mve_helper.c:2840:    return float16_abs(float16_sub(a, 
b, s));
target/arm/tcg/mve_helper.c:2845:    return float32_abs(float32_sub(a, 
b, s));
target/arm/tcg/mve_helper.c:2854:    return 
float16_maxnum(float16_abs(a), float16_abs(b), s);
target/arm/tcg/mve_helper.c:2859:    return 
float32_maxnum(float32_abs(a), float32_abs(b), s);
target/arm/tcg/mve_helper.c:2864:    return 
float16_minnum(float16_abs(a), float16_abs(b), s);
target/arm/tcg/mve_helper.c:2869:    return 
float32_minnum(float32_abs(a), float32_abs(b), s);
target/arm/tcg/neon_helper.c:1513:    float32 f0 = 
float32_abs(make_float32(a));
target/arm/tcg/neon_helper.c:1514:    float32 f1 = 
float32_abs(make_float32(b));
target/arm/tcg/neon_helper.c:1521:    float32 f0 = 
float32_abs(make_float32(a));
target/arm/tcg/neon_helper.c:1522:    float32 f1 = 
float32_abs(make_float32(b));
target/arm/tcg/neon_helper.c:1529:    float64 f0 = 
float64_abs(make_float64(a));
target/arm/tcg/neon_helper.c:1530:    float64 f1 = 
float64_abs(make_float64(b));
target/arm/tcg/neon_helper.c:1537:    float64 f0 = 
float64_abs(make_float64(a));
target/arm/tcg/neon_helper.c:1538:    float64 f1 = 
float64_abs(make_float64(b));
target/arm/tcg/sve_helper.c:4227:DO_REDUCE(sve_fmaxv_h, float16, H1_2, 
max, float16_chs(float16_infinity))
target/arm/tcg/sve_helper.c:4228:DO_REDUCE(sve_fmaxv_s, float32, H1_4, 
max, float32_chs(float32_infinity))
target/arm/tcg/sve_helper.c:4229:DO_REDUCE(sve_fmaxv_d, float64, H1_8, 
max, float64_chs(float64_infinity))
target/arm/tcg/sve_helper.c:4345:    return float16_abs(float16_sub(a, 
b, s));
target/arm/tcg/sve_helper.c:4350:    return float32_abs(float32_sub(a, 
b, s));
target/arm/tcg/sve_helper.c:4355:    return float64_abs(float64_sub(a, 
b, s));
target/arm/tcg/sve_helper.c:4997:            mm = float16_abs(mm);
target/arm/tcg/sve_helper.c:5019:            mm = float32_abs(mm);
target/arm/tcg/sve_helper.c:5045:            mm = float64_abs(mm);
target/arm/tcg/sve_helper.c:5062:    float16 neg_real = 
float16_chs(neg_imag);
target/arm/tcg/sve_helper.c:5094:    float32 neg_real = 
float32_chs(neg_imag);
target/arm/tcg/sve_helper.c:5126:    float64 neg_real = 
float64_chs(neg_imag);
target/arm/tcg/vec_helper.c:996:    return -float16_le(float16_abs(op2), 
float16_abs(op1), stat);
target/arm/tcg/vec_helper.c:1001:    return 
-float32_le(float32_abs(op2), float32_abs(op1), stat);
target/arm/tcg/vec_helper.c:1006:    return 
-float16_lt(float16_abs(op2), float16_abs(op1), stat);
target/arm/tcg/vec_helper.c:1011:    return 
-float32_lt(float32_abs(op2), float32_abs(op1), stat);
target/arm/tcg/vec_helper.c:1124:    return float16_abs(float16_sub(op1, 
op2, stat));
target/arm/tcg/vec_helper.c:1129:    return float32_abs(float32_sub(op1, 
op2, stat));
target/arm/tcg/vec_helper.c:1304:    return 
float16_muladd(float16_chs(op1), op2, dest, 0, stat);
target/arm/tcg/vec_helper.c:1310:    return 
float32_muladd(float32_chs(op1), op2, dest, 0, stat);
target/arm/vfp_helper.c:286:    return float16_chs(a);
target/arm/vfp_helper.c:291:    return float32_chs(a);
target/arm/vfp_helper.c:296:    return float64_chs(a);
target/arm/vfp_helper.c:301:    return float16_abs(a);
target/arm/vfp_helper.c:306:    return float32_abs(a);
target/arm/vfp_helper.c:311:    return float64_abs(a);
target/arm/vfp_helper.c:688:    } else if (float16_abs(f16) < (1 << 8)) {
target/arm/vfp_helper.c:738:    } else if (float32_abs(f32) < (1ULL << 
21)) {
target/arm/vfp_helper.c:1133:    if (value == float64_chs(float64_zero)) {
target/i386/tcg/fpu_helper.c:591:    ST0 = floatx80_chs(ST0);
target/i386/tcg/fpu_helper.c:596:    ST0 = floatx80_abs(ST0);
target/i386/tcg/fpu_helper.c:781:        tmp = floatx80_chs(tmp);
target/i386/tcg/fpu_helper.c:1739:        ST0 = 
floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
target/i386/tcg/fpu_helper.c:2104:            ST1 = floatx80_chs(ST1);
target/i386/tcg/fpu_helper.c:2119:            ST1 = floatx80_chs(ST0);
target/i386/tcg/fpu_helper.c:2135:            ST1 = floatx80_chs(ST1);
target/i386/tcg/fpu_helper.c:2140:            ST1 = 
floatx80_chs(floatx80_zero);
target/i386/tcg/fpu_helper.c:2276: 
floatx80_chs(floatx80_zero) :
target/i386/tcg/fpu_helper.c:2285: 
floatx80_chs(floatx80_infinity) :
target/m68k/fpu_helper.c:212:    res->d = 
floatx80_round(floatx80_abs(val->d), &env->fp_status);
target/m68k/fpu_helper.c:218:    res->d = 
floatx80_round(floatx80_abs(val->d), &env->fp_status);
target/m68k/fpu_helper.c:225:    res->d = 
floatx80_round(floatx80_abs(val->d), &env->fp_status);
target/m68k/fpu_helper.c:231:    res->d = 
floatx80_round(floatx80_chs(val->d), &env->fp_status);
target/m68k/fpu_helper.c:237:    res->d = 
floatx80_round(floatx80_chs(val->d), &env->fp_status);
target/m68k/fpu_helper.c:244:    res->d = 
floatx80_round(floatx80_chs(val->d), &env->fp_status);
target/m68k/fpu_helper.c:557:        quotient = 
floatx80_to_int32(floatx80_abs(fp_quot.d), &env->fp_status);
target/m68k/softfloat.c:2714:            fp0 = floatx80_abs(a); /* Y = 
|X| */
target/m68k/softfloat.c:2734:        fp0 = floatx80_abs(a); /* Y = |X| */
target/mips/tcg/fpu_helper.c:977:   return float64_abs(fdt0);
target/mips/tcg/fpu_helper.c:982:    return float32_abs(fst0);
target/mips/tcg/fpu_helper.c:990:    wt0 = float32_abs(fdt0 & 0XFFFFFFFF);
target/mips/tcg/fpu_helper.c:991:    wth0 = float32_abs(fdt0 >> 32);
target/mips/tcg/fpu_helper.c:997:   return float64_chs(fdt0);
target/mips/tcg/fpu_helper.c:1002:    return float32_chs(fst0);
target/mips/tcg/fpu_helper.c:1010:    wt0 = float32_chs(fdt0 & 0XFFFFFFFF);
target/mips/tcg/fpu_helper.c:1011:    wth0 = float32_chs(fdt0 >> 32);
target/mips/tcg/fpu_helper.c:1365:    fdt2 = 
float64_chs(float64_sub(fdt2, float64_one,
target/mips/tcg/fpu_helper.c:1374:    fst2 = 
float32_chs(float32_sub(fst2, float32_one,
target/mips/tcg/fpu_helper.c:1389:    fstl2 = 
float32_chs(float32_sub(fstl2, float32_one,
target/mips/tcg/fpu_helper.c:1391:    fsth2 = 
float32_chs(float32_sub(fsth2, float32_one,
target/mips/tcg/fpu_helper.c:1401:    fdt2 = 
float64_chs(float64_div(fdt2, FLOAT_TWO64,
target/mips/tcg/fpu_helper.c:1411:    fst2 = 
float32_chs(float32_div(fst2, FLOAT_TWO32,
target/mips/tcg/fpu_helper.c:1428:    fstl2 = 
float32_chs(float32_div(fstl2, FLOAT_TWO32,
target/mips/tcg/fpu_helper.c:1430:    fsth2 = 
float32_chs(float32_div(fsth2, FLOAT_TWO32,
target/mips/tcg/fpu_helper.c:1633:    fst0 = float64_chs(fst0);
target/mips/tcg/fpu_helper.c:1644:    fst0 = float32_chs(fst0);
target/mips/tcg/fpu_helper.c:1662:    fstl0 = float32_chs(fstl0);
target/mips/tcg/fpu_helper.c:1665:    fsth0 = float32_chs(fsth0);
target/mips/tcg/fpu_helper.c:1676:    fst0 = float64_chs(fst0);
target/mips/tcg/fpu_helper.c:1687:    fst0 = float32_chs(fst0);
target/mips/tcg/fpu_helper.c:1705:    fstl0 = float32_chs(fstl0);
target/mips/tcg/fpu_helper.c:1708:    fsth0 = float32_chs(fsth0);
target/mips/tcg/fpu_helper.c:1781:    fdt0 = float64_abs(fdt0); 
                         \
target/mips/tcg/fpu_helper.c:1782:    fdt1 = float64_abs(fdt1); 
                         \
target/mips/tcg/fpu_helper.c:1860:    fst0 = float32_abs(fst0); 
                         \
target/mips/tcg/fpu_helper.c:1861:    fst1 = float32_abs(fst1); 
                         \
target/mips/tcg/fpu_helper.c:1950:    fst0 = float32_abs(fdt0 & 
0XFFFFFFFF);                      \
target/mips/tcg/fpu_helper.c:1951:    fsth0 = float32_abs(fdt0 >> 32); 
                          \
target/mips/tcg/fpu_helper.c:1952:    fst1 = float32_abs(fdt1 & 
0XFFFFFFFF);                      \
target/mips/tcg/fpu_helper.c:1953:    fsth1 = float32_abs(fdt1 >> 32); 
                          \
target/ppc/fpu_helper.c:44:        return float32_chs(a);
target/ppc/int_helper.c:694:            float32 bneg = 
float32_chs(b->f32[i]);
target/s390x/tcg/vec_fpu_helper.c:922:                a = float32_abs(a);
target/s390x/tcg/vec_fpu_helper.c:923:                b = float32_abs(b);
target/s390x/tcg/vec_fpu_helper.c:984:                a = float64_abs(a);
target/s390x/tcg/vec_fpu_helper.c:985:                b = float64_abs(b);
target/s390x/tcg/vec_fpu_helper.c:1042:            a = float128_abs(a);
target/s390x/tcg/vec_fpu_helper.c:1043:            b = float128_abs(b);
target/sparc/fop_helper.c:119:    return float32_chs(src);
target/sparc/fop_helper.c:125:    return float64_chs(src);
target/sparc/fop_helper.c:130:    QT0 = float128_chs(QT1);
target/sparc/fop_helper.c:234:    return float32_abs(src);
target/sparc/fop_helper.c:240:    return float64_abs(src);
target/sparc/fop_helper.c:245:    QT0 = float128_abs(QT1);
target/xtensa/fpu_helper.c:126:    return float64_abs(v);
target/xtensa/fpu_helper.c:131:    return float32_abs(v);
target/xtensa/fpu_helper.c:136:    return float64_chs(v);
target/xtensa/fpu_helper.c:141:    return float32_chs(v);

Re: [PATCH 05/22] target/sparc: Inline FNEG, FABS

Posted by Richard Henderson 9 months, 4 weeks ago

On 1/30/24 18:40, Philippe Mathieu-Daudé wrote:
> Hi Richard,
> 
> On 3/11/23 18:38, Richard Henderson wrote:
>> These are simple bit manipulation insns.
>> Begin using i128 for float128.
>> Implement FMOVq with do_qq.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   target/sparc/helper.h     |  6 ----
>>   target/sparc/fop_helper.c | 34 ---------------------
>>   target/sparc/translate.c  | 62 +++++++++++++++++++--------------------
>>   3 files changed, 30 insertions(+), 72 deletions(-)
> 
> 
>> @@ -1239,13 +1235,13 @@ static void gen_op_fmovs(TCGv_i32 dst, TCGv_i32 src)
>>   static void gen_op_fnegs(TCGv_i32 dst, TCGv_i32 src)
>>   {
>>       gen_op_clear_ieee_excp_and_FTT();
>> -    gen_helper_fnegs(dst, src);
>> +    tcg_gen_xori_i32(dst, src, 1u << 31);
>>   }
>>   static void gen_op_fabss(TCGv_i32 dst, TCGv_i32 src)
>>   {
>>       gen_op_clear_ieee_excp_and_FTT();
>> -    gen_helper_fabss(dst, src);
>> +    tcg_gen_andi_i32(dst, src, ~(1u << 31));
>>   }
>>   static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
>> @@ -1257,13 +1253,33 @@ static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
>>   static void gen_op_fnegd(TCGv_i64 dst, TCGv_i64 src)
>>   {
>>       gen_op_clear_ieee_excp_and_FTT();
>> -    gen_helper_fnegd(dst, src);
>> +    tcg_gen_xori_i64(dst, src, 1ull << 63);
>>   }
>>   static void gen_op_fabsd(TCGv_i64 dst, TCGv_i64 src)
>>   {
>>       gen_op_clear_ieee_excp_and_FTT();
>> -    gen_helper_fabsd(dst, src);
>> +    tcg_gen_andi_i64(dst, src, ~(1ull << 63));
>> +}
>> +
>> +static void gen_op_fnegq(TCGv_i128 dst, TCGv_i128 src)
>> +{
>> +    TCGv_i64 l = tcg_temp_new_i64();
>> +    TCGv_i64 h = tcg_temp_new_i64();
>> +
>> +    tcg_gen_extr_i128_i64(l, h, src);
>> +    tcg_gen_xori_i64(h, h, 1ull << 63);
>> +    tcg_gen_concat_i64_i128(dst, l, h);
>> +}
>> +
>> +static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
>> +{
>> +    TCGv_i64 l = tcg_temp_new_i64();
>> +    TCGv_i64 h = tcg_temp_new_i64();
>> +
>> +    tcg_gen_extr_i128_i64(l, h, src);
>> +    tcg_gen_andi_i64(h, h, ~(1ull << 63));
>> +    tcg_gen_concat_i64_i128(dst, l, h);
>>   }
> 
> Why not extract these as generic TCG FPU helpers?

The representation of floating-point registers varies wildly between targets.  Sparc would 
be the only one to (a) have float128 and (b) represent them in TCGv_i128.

Even considering float32, is the representation TCGv_i32 or TCGv_i64?
Should the result be nan-boxed (riscv and loongarch)?

We already provide tcg_gen_xori_i{32,64}, so, really that's enough for any target.

> $ git grep -wE 'float...?_(chs|abs)' target/
> target/arm/tcg/helper-a64.c:214:    a = float16_chs(a);
> target/arm/tcg/helper-a64.c:229:    a = float32_chs(a);
> target/arm/tcg/helper-a64.c:244:    a = float64_chs(a);
> target/arm/tcg/helper-a64.c:259:    a = float16_chs(a);
> target/arm/tcg/helper-a64.c:274:    a = float32_chs(a);
> target/arm/tcg/helper-a64.c:289:    a = float64_chs(a);
> target/arm/tcg/helper-a64.c:632:    float16 f0 = float16_abs(a);
> target/arm/tcg/helper-a64.c:633:    float16 f1 = float16_abs(b);
> target/arm/tcg/helper-a64.c:642:    float16 f0 = float16_abs(a);
> target/arm/tcg/helper-a64.c:643:    float16 f1 = float16_abs(b);
> target/arm/tcg/mve_helper.c:2840:    return float16_abs(float16_sub(a, b, s));
> target/arm/tcg/mve_helper.c:2845:    return float32_abs(float32_sub(a, b, s));
> target/arm/tcg/mve_helper.c:2854:    return float16_maxnum(float16_abs(a), float16_abs(b), 
> s);
> target/arm/tcg/mve_helper.c:2859:    return float32_maxnum(float32_abs(a), float32_abs(b), 
> s);
> target/arm/tcg/mve_helper.c:2864:    return float16_minnum(float16_abs(a), float16_abs(b), 
> s);
> target/arm/tcg/mve_helper.c:2869:    return float32_minnum(float32_abs(a), float32_abs(b), 
> s);
> target/arm/tcg/neon_helper.c:1513:    float32 f0 = float32_abs(make_float32(a));
> target/arm/tcg/neon_helper.c:1514:    float32 f1 = float32_abs(make_float32(b));
> target/arm/tcg/neon_helper.c:1521:    float32 f0 = float32_abs(make_float32(a));
> target/arm/tcg/neon_helper.c:1522:    float32 f1 = float32_abs(make_float32(b));
> target/arm/tcg/neon_helper.c:1529:    float64 f0 = float64_abs(make_float64(a));
> target/arm/tcg/neon_helper.c:1530:    float64 f1 = float64_abs(make_float64(b));
> target/arm/tcg/neon_helper.c:1537:    float64 f0 = float64_abs(make_float64(a));
> target/arm/tcg/neon_helper.c:1538:    float64 f1 = float64_abs(make_float64(b));
> target/arm/tcg/sve_helper.c:4227:DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, 
> float16_chs(float16_infinity))
> target/arm/tcg/sve_helper.c:4228:DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, 
> float32_chs(float32_infinity))
> target/arm/tcg/sve_helper.c:4229:DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, 
> float64_chs(float64_infinity))
> target/arm/tcg/sve_helper.c:4345:    return float16_abs(float16_sub(a, b, s));
> target/arm/tcg/sve_helper.c:4350:    return float32_abs(float32_sub(a, b, s));
> target/arm/tcg/sve_helper.c:4355:    return float64_abs(float64_sub(a, b, s));
> target/arm/tcg/sve_helper.c:4997:            mm = float16_abs(mm);
> target/arm/tcg/sve_helper.c:5019:            mm = float32_abs(mm);
> target/arm/tcg/sve_helper.c:5045:            mm = float64_abs(mm);
> target/arm/tcg/sve_helper.c:5062:    float16 neg_real = float16_chs(neg_imag);
> target/arm/tcg/sve_helper.c:5094:    float32 neg_real = float32_chs(neg_imag);
> target/arm/tcg/sve_helper.c:5126:    float64 neg_real = float64_chs(neg_imag);
> target/arm/tcg/vec_helper.c:996:    return -float16_le(float16_abs(op2), float16_abs(op1), 
> stat);
> target/arm/tcg/vec_helper.c:1001:    return -float32_le(float32_abs(op2), 
> float32_abs(op1), stat);
> target/arm/tcg/vec_helper.c:1006:    return -float16_lt(float16_abs(op2), 
> float16_abs(op1), stat);
> target/arm/tcg/vec_helper.c:1011:    return -float32_lt(float32_abs(op2), 
> float32_abs(op1), stat);
> target/arm/tcg/vec_helper.c:1124:    return float16_abs(float16_sub(op1, op2, stat));
> target/arm/tcg/vec_helper.c:1129:    return float32_abs(float32_sub(op1, op2, stat));
> target/arm/tcg/vec_helper.c:1304:    return float16_muladd(float16_chs(op1), op2, dest, 0, 
> stat);
> target/arm/tcg/vec_helper.c:1310:    return float32_muladd(float32_chs(op1), op2, dest, 0, 
> stat);
> target/arm/vfp_helper.c:286:    return float16_chs(a);
> target/arm/vfp_helper.c:291:    return float32_chs(a);
> target/arm/vfp_helper.c:296:    return float64_chs(a);
> target/arm/vfp_helper.c:301:    return float16_abs(a);
> target/arm/vfp_helper.c:306:    return float32_abs(a);
> target/arm/vfp_helper.c:311:    return float64_abs(a);
> target/arm/vfp_helper.c:688:    } else if (float16_abs(f16) < (1 << 8)) {
> target/arm/vfp_helper.c:738:    } else if (float32_abs(f32) < (1ULL << 21)) {
> target/arm/vfp_helper.c:1133:    if (value == float64_chs(float64_zero)) {
> target/i386/tcg/fpu_helper.c:591:    ST0 = floatx80_chs(ST0);
> target/i386/tcg/fpu_helper.c:596:    ST0 = floatx80_abs(ST0);
> target/i386/tcg/fpu_helper.c:781:        tmp = floatx80_chs(tmp);
> target/i386/tcg/fpu_helper.c:1739:        ST0 = floatx80_div(floatx80_chs(floatx80_one), 
> floatx80_zero,
> target/i386/tcg/fpu_helper.c:2104:            ST1 = floatx80_chs(ST1);
> target/i386/tcg/fpu_helper.c:2119:            ST1 = floatx80_chs(ST0);
> target/i386/tcg/fpu_helper.c:2135:            ST1 = floatx80_chs(ST1);
> target/i386/tcg/fpu_helper.c:2140:            ST1 = floatx80_chs(floatx80_zero);
> target/i386/tcg/fpu_helper.c:2276: floatx80_chs(floatx80_zero) :
> target/i386/tcg/fpu_helper.c:2285: floatx80_chs(floatx80_infinity) :
> target/m68k/fpu_helper.c:212:    res->d = floatx80_round(floatx80_abs(val->d), 
> &env->fp_status);
> target/m68k/fpu_helper.c:218:    res->d = floatx80_round(floatx80_abs(val->d), 
> &env->fp_status);
> target/m68k/fpu_helper.c:225:    res->d = floatx80_round(floatx80_abs(val->d), 
> &env->fp_status);
> target/m68k/fpu_helper.c:231:    res->d = floatx80_round(floatx80_chs(val->d), 
> &env->fp_status);
> target/m68k/fpu_helper.c:237:    res->d = floatx80_round(floatx80_chs(val->d), 
> &env->fp_status);
> target/m68k/fpu_helper.c:244:    res->d = floatx80_round(floatx80_chs(val->d), 
> &env->fp_status);
> target/m68k/fpu_helper.c:557:        quotient = floatx80_to_int32(floatx80_abs(fp_quot.d), 
> &env->fp_status);
> target/m68k/softfloat.c:2714:            fp0 = floatx80_abs(a); /* Y = |X| */
> target/m68k/softfloat.c:2734:        fp0 = floatx80_abs(a); /* Y = |X| */
> target/mips/tcg/fpu_helper.c:977:   return float64_abs(fdt0);
> target/mips/tcg/fpu_helper.c:982:    return float32_abs(fst0);
> target/mips/tcg/fpu_helper.c:990:    wt0 = float32_abs(fdt0 & 0XFFFFFFFF);
> target/mips/tcg/fpu_helper.c:991:    wth0 = float32_abs(fdt0 >> 32);
> target/mips/tcg/fpu_helper.c:997:   return float64_chs(fdt0);
> target/mips/tcg/fpu_helper.c:1002:    return float32_chs(fst0);
> target/mips/tcg/fpu_helper.c:1010:    wt0 = float32_chs(fdt0 & 0XFFFFFFFF);
> target/mips/tcg/fpu_helper.c:1011:    wth0 = float32_chs(fdt0 >> 32);
> target/mips/tcg/fpu_helper.c:1365:    fdt2 = float64_chs(float64_sub(fdt2, float64_one,
> target/mips/tcg/fpu_helper.c:1374:    fst2 = float32_chs(float32_sub(fst2, float32_one,
> target/mips/tcg/fpu_helper.c:1389:    fstl2 = float32_chs(float32_sub(fstl2, float32_one,
> target/mips/tcg/fpu_helper.c:1391:    fsth2 = float32_chs(float32_sub(fsth2, float32_one,
> target/mips/tcg/fpu_helper.c:1401:    fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64,
> target/mips/tcg/fpu_helper.c:1411:    fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32,
> target/mips/tcg/fpu_helper.c:1428:    fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32,
> target/mips/tcg/fpu_helper.c:1430:    fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32,
> target/mips/tcg/fpu_helper.c:1633:    fst0 = float64_chs(fst0);
> target/mips/tcg/fpu_helper.c:1644:    fst0 = float32_chs(fst0);
> target/mips/tcg/fpu_helper.c:1662:    fstl0 = float32_chs(fstl0);
> target/mips/tcg/fpu_helper.c:1665:    fsth0 = float32_chs(fsth0);
> target/mips/tcg/fpu_helper.c:1676:    fst0 = float64_chs(fst0);
> target/mips/tcg/fpu_helper.c:1687:    fst0 = float32_chs(fst0);
> target/mips/tcg/fpu_helper.c:1705:    fstl0 = float32_chs(fstl0);
> target/mips/tcg/fpu_helper.c:1708:    fsth0 = float32_chs(fsth0);
> target/mips/tcg/fpu_helper.c:1781:    fdt0 = float64_abs(fdt0);                         \
> target/mips/tcg/fpu_helper.c:1782:    fdt1 = float64_abs(fdt1);                         \
> target/mips/tcg/fpu_helper.c:1860:    fst0 = float32_abs(fst0);                         \
> target/mips/tcg/fpu_helper.c:1861:    fst1 = float32_abs(fst1);                         \
> target/mips/tcg/fpu_helper.c:1950:    fst0 = float32_abs(fdt0 & 
> 0XFFFFFFFF);                      \
> target/mips/tcg/fpu_helper.c:1951:    fsth0 = float32_abs(fdt0 >> 32); 
>                           \
> target/mips/tcg/fpu_helper.c:1952:    fst1 = float32_abs(fdt1 & 
> 0XFFFFFFFF);                      \
> target/mips/tcg/fpu_helper.c:1953:    fsth1 = float32_abs(fdt1 >> 32); 
>                           \
> target/ppc/fpu_helper.c:44:        return float32_chs(a);
> target/ppc/int_helper.c:694:            float32 bneg = float32_chs(b->f32[i]);
> target/s390x/tcg/vec_fpu_helper.c:922:                a = float32_abs(a);
> target/s390x/tcg/vec_fpu_helper.c:923:                b = float32_abs(b);
> target/s390x/tcg/vec_fpu_helper.c:984:                a = float64_abs(a);
> target/s390x/tcg/vec_fpu_helper.c:985:                b = float64_abs(b);
> target/s390x/tcg/vec_fpu_helper.c:1042:            a = float128_abs(a);
> target/s390x/tcg/vec_fpu_helper.c:1043:            b = float128_abs(b);
> target/sparc/fop_helper.c:119:    return float32_chs(src);
> target/sparc/fop_helper.c:125:    return float64_chs(src);
> target/sparc/fop_helper.c:130:    QT0 = float128_chs(QT1);
> target/sparc/fop_helper.c:234:    return float32_abs(src);
> target/sparc/fop_helper.c:240:    return float64_abs(src);
> target/sparc/fop_helper.c:245:    QT0 = float128_abs(QT1);
> target/xtensa/fpu_helper.c:126:    return float64_abs(v);
> target/xtensa/fpu_helper.c:131:    return float32_abs(v);
> target/xtensa/fpu_helper.c:136:    return float64_chs(v);
> target/xtensa/fpu_helper.c:141:    return float32_chs(v);

With only a few exceptions, most of of these results are part of a larger out-of-line 
operation.


r~

Re: [PATCH 05/22] target/sparc: Inline FNEG, FABS

Posted by Philippe Mathieu-Daudé 10 months ago

On 3/11/23 18:38, Richard Henderson wrote:
> These are simple bit manipulation insns.
> Begin using i128 for float128.
> Implement FMOVq with do_qq.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/sparc/helper.h     |  6 ----
>   target/sparc/fop_helper.c | 34 ---------------------
>   target/sparc/translate.c  | 62 +++++++++++++++++++--------------------
>   3 files changed, 30 insertions(+), 72 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>