These are simple bit manipulation insns.
Begin using i128 for float128.
Implement FMOVq with do_qq.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/sparc/helper.h | 6 ----
target/sparc/fop_helper.c | 34 ---------------------
target/sparc/translate.c | 62 +++++++++++++++++++--------------------
3 files changed, 30 insertions(+), 72 deletions(-)
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index 55eff66283..74a1575d21 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -37,7 +37,6 @@ DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32)
#endif
DEF_HELPER_FLAGS_1(check_ieee_exceptions, TCG_CALL_NO_WG, tl, env)
DEF_HELPER_FLAGS_2(set_fsr, TCG_CALL_NO_RWG, void, env, tl)
-DEF_HELPER_FLAGS_1(fabss, TCG_CALL_NO_RWG_SE, f32, f32)
DEF_HELPER_FLAGS_2(fsqrts, TCG_CALL_NO_RWG, f32, env, f32)
DEF_HELPER_FLAGS_2(fsqrtd, TCG_CALL_NO_RWG, f64, env, f64)
DEF_HELPER_FLAGS_3(fcmps, TCG_CALL_NO_WG, tl, env, f32, f32)
@@ -48,7 +47,6 @@ DEF_HELPER_FLAGS_1(fsqrtq, TCG_CALL_NO_RWG, void, env)
DEF_HELPER_FLAGS_1(fcmpq, TCG_CALL_NO_WG, tl, env)
DEF_HELPER_FLAGS_1(fcmpeq, TCG_CALL_NO_WG, tl, env)
#ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_1(fabsd, TCG_CALL_NO_RWG_SE, f64, f64)
DEF_HELPER_FLAGS_3(fcmps_fcc1, TCG_CALL_NO_WG, tl, env, f32, f32)
DEF_HELPER_FLAGS_3(fcmps_fcc2, TCG_CALL_NO_WG, tl, env, f32, f32)
DEF_HELPER_FLAGS_3(fcmps_fcc3, TCG_CALL_NO_WG, tl, env, f32, f32)
@@ -61,7 +59,6 @@ DEF_HELPER_FLAGS_3(fcmpes_fcc3, TCG_CALL_NO_WG, tl, env, f32, f32)
DEF_HELPER_FLAGS_3(fcmped_fcc1, TCG_CALL_NO_WG, tl, env, f64, f64)
DEF_HELPER_FLAGS_3(fcmped_fcc2, TCG_CALL_NO_WG, tl, env, f64, f64)
DEF_HELPER_FLAGS_3(fcmped_fcc3, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_1(fabsq, TCG_CALL_NO_RWG, void, env)
DEF_HELPER_FLAGS_1(fcmpq_fcc1, TCG_CALL_NO_WG, tl, env)
DEF_HELPER_FLAGS_1(fcmpq_fcc2, TCG_CALL_NO_WG, tl, env)
DEF_HELPER_FLAGS_1(fcmpq_fcc3, TCG_CALL_NO_WG, tl, env)
@@ -90,15 +87,12 @@ DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_RWG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_RWG, f64, env, f32, f32)
DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_RWG, void, env, f64, f64)
-DEF_HELPER_FLAGS_1(fnegs, TCG_CALL_NO_RWG_SE, f32, f32)
DEF_HELPER_FLAGS_2(fitod, TCG_CALL_NO_RWG_SE, f64, env, s32)
DEF_HELPER_FLAGS_2(fitoq, TCG_CALL_NO_RWG, void, env, s32)
DEF_HELPER_FLAGS_2(fitos, TCG_CALL_NO_RWG, f32, env, s32)
#ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_1(fnegd, TCG_CALL_NO_RWG_SE, f64, f64)
-DEF_HELPER_FLAGS_1(fnegq, TCG_CALL_NO_RWG, void, env)
DEF_HELPER_FLAGS_2(fxtos, TCG_CALL_NO_RWG, f32, env, s64)
DEF_HELPER_FLAGS_2(fxtod, TCG_CALL_NO_RWG, f64, env, s64)
DEF_HELPER_FLAGS_2(fxtoq, TCG_CALL_NO_RWG, void, env, s64)
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 0f8aa3abcd..d6fb769769 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -114,23 +114,6 @@ void helper_fdmulq(CPUSPARCState *env, float64 src1, float64 src2)
&env->fp_status);
}
-float32 helper_fnegs(float32 src)
-{
- return float32_chs(src);
-}
-
-#ifdef TARGET_SPARC64
-float64 helper_fnegd(float64 src)
-{
- return float64_chs(src);
-}
-
-F_HELPER(neg, q)
-{
- QT0 = float128_chs(QT1);
-}
-#endif
-
/* Integer to float conversion. */
float32 helper_fitos(CPUSPARCState *env, int32_t src)
{
@@ -229,23 +212,6 @@ int64_t helper_fqtox(CPUSPARCState *env)
}
#endif
-float32 helper_fabss(float32 src)
-{
- return float32_abs(src);
-}
-
-#ifdef TARGET_SPARC64
-float64 helper_fabsd(float64 src)
-{
- return float64_abs(src);
-}
-
-void helper_fabsq(CPUSPARCState *env)
-{
- QT0 = float128_abs(QT1);
-}
-#endif
-
float32 helper_fsqrts(CPUSPARCState *env, float32 src)
{
return float32_sqrt(src, &env->fp_status);
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 0e494d3ebd..254f185b83 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -43,9 +43,7 @@
#else
# define gen_helper_clear_softint(E, S) qemu_build_not_reached()
# define gen_helper_done(E) qemu_build_not_reached()
-# define gen_helper_fabsd(D, S) qemu_build_not_reached()
# define gen_helper_flushw(E) qemu_build_not_reached()
-# define gen_helper_fnegd(D, S) qemu_build_not_reached()
# define gen_helper_rdccr(D, E) qemu_build_not_reached()
# define gen_helper_rdcwp(D, E) qemu_build_not_reached()
# define gen_helper_restored(E) qemu_build_not_reached()
@@ -61,7 +59,6 @@
# define gen_helper_write_softint(E, S) qemu_build_not_reached()
# define gen_helper_wrpil(E, S) qemu_build_not_reached()
# define gen_helper_wrpstate(E, S) qemu_build_not_reached()
-# define gen_helper_fabsq ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpeq16 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpeq32 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpgt16 ({ qemu_build_not_reached(); NULL; })
@@ -79,7 +76,6 @@
# define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fmuld8sux16 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fmuld8ulx16 ({ qemu_build_not_reached(); NULL; })
-# define gen_helper_fnegq ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fpmerge ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fqtox ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fstox ({ qemu_build_not_reached(); NULL; })
@@ -1239,13 +1235,13 @@ static void gen_op_fmovs(TCGv_i32 dst, TCGv_i32 src)
static void gen_op_fnegs(TCGv_i32 dst, TCGv_i32 src)
{
gen_op_clear_ieee_excp_and_FTT();
- gen_helper_fnegs(dst, src);
+ tcg_gen_xori_i32(dst, src, 1u << 31);
}
static void gen_op_fabss(TCGv_i32 dst, TCGv_i32 src)
{
gen_op_clear_ieee_excp_and_FTT();
- gen_helper_fabss(dst, src);
+ tcg_gen_andi_i32(dst, src, ~(1u << 31));
}
static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
@@ -1257,13 +1253,33 @@ static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
static void gen_op_fnegd(TCGv_i64 dst, TCGv_i64 src)
{
gen_op_clear_ieee_excp_and_FTT();
- gen_helper_fnegd(dst, src);
+ tcg_gen_xori_i64(dst, src, 1ull << 63);
}
static void gen_op_fabsd(TCGv_i64 dst, TCGv_i64 src)
{
gen_op_clear_ieee_excp_and_FTT();
- gen_helper_fabsd(dst, src);
+ tcg_gen_andi_i64(dst, src, ~(1ull << 63));
+}
+
+static void gen_op_fnegq(TCGv_i128 dst, TCGv_i128 src)
+{
+ TCGv_i64 l = tcg_temp_new_i64();
+ TCGv_i64 h = tcg_temp_new_i64();
+
+ tcg_gen_extr_i128_i64(l, h, src);
+ tcg_gen_xori_i64(h, h, 1ull << 63);
+ tcg_gen_concat_i64_i128(dst, l, h);
+}
+
+static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
+{
+ TCGv_i64 l = tcg_temp_new_i64();
+ TCGv_i64 h = tcg_temp_new_i64();
+
+ tcg_gen_extr_i128_i64(l, h, src);
+ tcg_gen_andi_i64(h, h, ~(1ull << 63));
+ tcg_gen_concat_i64_i128(dst, l, h);
}
#ifdef TARGET_SPARC64
@@ -4629,13 +4645,11 @@ TRANS(FiTOd, ALL, do_env_df, a, gen_helper_fitod)
TRANS(FsTOd, ALL, do_env_df, a, gen_helper_fstod)
TRANS(FsTOx, 64, do_env_df, a, gen_helper_fstox)
-static bool trans_FMOVq(DisasContext *dc, arg_FMOVq *a)
+static bool do_qq(DisasContext *dc, arg_r_r *a,
+ void (*func)(TCGv_i128, TCGv_i128))
{
TCGv_i128 t;
- if (!avail_64(dc)) {
- return false;
- }
if (gen_trap_ifnofpu(dc)) {
return true;
}
@@ -4645,30 +4659,14 @@ static bool trans_FMOVq(DisasContext *dc, arg_FMOVq *a)
gen_op_clear_ieee_excp_and_FTT();
t = gen_load_fpr_Q(dc, a->rs);
+ func(t, t);
gen_store_fpr_Q(dc, a->rd, t);
return advance_pc(dc);
}
-static bool do_qq(DisasContext *dc, arg_r_r *a,
- void (*func)(TCGv_env))
-{
- if (gen_trap_ifnofpu(dc)) {
- return true;
- }
- if (gen_trap_float128(dc)) {
- return true;
- }
-
- gen_op_clear_ieee_excp_and_FTT();
- gen_op_load_fpr_QT1(QFPREG(a->rs));
- func(tcg_env);
- gen_op_store_QT0_fpr(QFPREG(a->rd));
- gen_update_fprs_dirty(dc, QFPREG(a->rd));
- return advance_pc(dc);
-}
-
-TRANS(FNEGq, 64, do_qq, a, gen_helper_fnegq)
-TRANS(FABSq, 64, do_qq, a, gen_helper_fabsq)
+TRANS(FMOVq, 64, do_qq, a, tcg_gen_mov_i128)
+TRANS(FNEGq, 64, do_qq, a, gen_op_fnegq)
+TRANS(FABSq, 64, do_qq, a, gen_op_fabsq)
static bool do_env_qq(DisasContext *dc, arg_r_r *a,
void (*func)(TCGv_env))
--
2.34.1
Hi Richard, On 3/11/23 18:38, Richard Henderson wrote: > These are simple bit manipulation insns. > Begin using i128 for float128. > Implement FMOVq with do_qq. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/sparc/helper.h | 6 ---- > target/sparc/fop_helper.c | 34 --------------------- > target/sparc/translate.c | 62 +++++++++++++++++++-------------------- > 3 files changed, 30 insertions(+), 72 deletions(-) > @@ -1239,13 +1235,13 @@ static void gen_op_fmovs(TCGv_i32 dst, TCGv_i32 src) > static void gen_op_fnegs(TCGv_i32 dst, TCGv_i32 src) > { > gen_op_clear_ieee_excp_and_FTT(); > - gen_helper_fnegs(dst, src); > + tcg_gen_xori_i32(dst, src, 1u << 31); > } > > static void gen_op_fabss(TCGv_i32 dst, TCGv_i32 src) > { > gen_op_clear_ieee_excp_and_FTT(); > - gen_helper_fabss(dst, src); > + tcg_gen_andi_i32(dst, src, ~(1u << 31)); > } > > static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src) > @@ -1257,13 +1253,33 @@ static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src) > static void gen_op_fnegd(TCGv_i64 dst, TCGv_i64 src) > { > gen_op_clear_ieee_excp_and_FTT(); > - gen_helper_fnegd(dst, src); > + tcg_gen_xori_i64(dst, src, 1ull << 63); > } > > static void gen_op_fabsd(TCGv_i64 dst, TCGv_i64 src) > { > gen_op_clear_ieee_excp_and_FTT(); > - gen_helper_fabsd(dst, src); > + tcg_gen_andi_i64(dst, src, ~(1ull << 63)); > +} > + > +static void gen_op_fnegq(TCGv_i128 dst, TCGv_i128 src) > +{ > + TCGv_i64 l = tcg_temp_new_i64(); > + TCGv_i64 h = tcg_temp_new_i64(); > + > + tcg_gen_extr_i128_i64(l, h, src); > + tcg_gen_xori_i64(h, h, 1ull << 63); > + tcg_gen_concat_i64_i128(dst, l, h); > +} > + > +static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src) > +{ > + TCGv_i64 l = tcg_temp_new_i64(); > + TCGv_i64 h = tcg_temp_new_i64(); > + > + tcg_gen_extr_i128_i64(l, h, src); > + tcg_gen_andi_i64(h, h, ~(1ull << 63)); > + tcg_gen_concat_i64_i128(dst, l, h); > } Why not extract these as generic TCG FPU helpers? $ git grep -wE 'float...?_(chs|abs)' target/ target/arm/tcg/helper-a64.c:214: a = float16_chs(a); target/arm/tcg/helper-a64.c:229: a = float32_chs(a); target/arm/tcg/helper-a64.c:244: a = float64_chs(a); target/arm/tcg/helper-a64.c:259: a = float16_chs(a); target/arm/tcg/helper-a64.c:274: a = float32_chs(a); target/arm/tcg/helper-a64.c:289: a = float64_chs(a); target/arm/tcg/helper-a64.c:632: float16 f0 = float16_abs(a); target/arm/tcg/helper-a64.c:633: float16 f1 = float16_abs(b); target/arm/tcg/helper-a64.c:642: float16 f0 = float16_abs(a); target/arm/tcg/helper-a64.c:643: float16 f1 = float16_abs(b); target/arm/tcg/mve_helper.c:2840: return float16_abs(float16_sub(a, b, s)); target/arm/tcg/mve_helper.c:2845: return float32_abs(float32_sub(a, b, s)); target/arm/tcg/mve_helper.c:2854: return float16_maxnum(float16_abs(a), float16_abs(b), s); target/arm/tcg/mve_helper.c:2859: return float32_maxnum(float32_abs(a), float32_abs(b), s); target/arm/tcg/mve_helper.c:2864: return float16_minnum(float16_abs(a), float16_abs(b), s); target/arm/tcg/mve_helper.c:2869: return float32_minnum(float32_abs(a), float32_abs(b), s); target/arm/tcg/neon_helper.c:1513: float32 f0 = float32_abs(make_float32(a)); target/arm/tcg/neon_helper.c:1514: float32 f1 = float32_abs(make_float32(b)); target/arm/tcg/neon_helper.c:1521: float32 f0 = float32_abs(make_float32(a)); target/arm/tcg/neon_helper.c:1522: float32 f1 = float32_abs(make_float32(b)); target/arm/tcg/neon_helper.c:1529: float64 f0 = float64_abs(make_float64(a)); target/arm/tcg/neon_helper.c:1530: float64 f1 = float64_abs(make_float64(b)); target/arm/tcg/neon_helper.c:1537: float64 f0 = float64_abs(make_float64(a)); target/arm/tcg/neon_helper.c:1538: float64 f1 = float64_abs(make_float64(b)); target/arm/tcg/sve_helper.c:4227:DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) target/arm/tcg/sve_helper.c:4228:DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) target/arm/tcg/sve_helper.c:4229:DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) target/arm/tcg/sve_helper.c:4345: return float16_abs(float16_sub(a, b, s)); target/arm/tcg/sve_helper.c:4350: return float32_abs(float32_sub(a, b, s)); target/arm/tcg/sve_helper.c:4355: return float64_abs(float64_sub(a, b, s)); target/arm/tcg/sve_helper.c:4997: mm = float16_abs(mm); target/arm/tcg/sve_helper.c:5019: mm = float32_abs(mm); target/arm/tcg/sve_helper.c:5045: mm = float64_abs(mm); target/arm/tcg/sve_helper.c:5062: float16 neg_real = float16_chs(neg_imag); target/arm/tcg/sve_helper.c:5094: float32 neg_real = float32_chs(neg_imag); target/arm/tcg/sve_helper.c:5126: float64 neg_real = float64_chs(neg_imag); target/arm/tcg/vec_helper.c:996: return -float16_le(float16_abs(op2), float16_abs(op1), stat); target/arm/tcg/vec_helper.c:1001: return -float32_le(float32_abs(op2), float32_abs(op1), stat); target/arm/tcg/vec_helper.c:1006: return -float16_lt(float16_abs(op2), float16_abs(op1), stat); target/arm/tcg/vec_helper.c:1011: return -float32_lt(float32_abs(op2), float32_abs(op1), stat); target/arm/tcg/vec_helper.c:1124: return float16_abs(float16_sub(op1, op2, stat)); target/arm/tcg/vec_helper.c:1129: return float32_abs(float32_sub(op1, op2, stat)); target/arm/tcg/vec_helper.c:1304: return float16_muladd(float16_chs(op1), op2, dest, 0, stat); target/arm/tcg/vec_helper.c:1310: return float32_muladd(float32_chs(op1), op2, dest, 0, stat); target/arm/vfp_helper.c:286: return float16_chs(a); target/arm/vfp_helper.c:291: return float32_chs(a); target/arm/vfp_helper.c:296: return float64_chs(a); target/arm/vfp_helper.c:301: return float16_abs(a); target/arm/vfp_helper.c:306: return float32_abs(a); target/arm/vfp_helper.c:311: return float64_abs(a); target/arm/vfp_helper.c:688: } else if (float16_abs(f16) < (1 << 8)) { target/arm/vfp_helper.c:738: } else if (float32_abs(f32) < (1ULL << 21)) { target/arm/vfp_helper.c:1133: if (value == float64_chs(float64_zero)) { target/i386/tcg/fpu_helper.c:591: ST0 = floatx80_chs(ST0); target/i386/tcg/fpu_helper.c:596: ST0 = floatx80_abs(ST0); target/i386/tcg/fpu_helper.c:781: tmp = floatx80_chs(tmp); target/i386/tcg/fpu_helper.c:1739: ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, target/i386/tcg/fpu_helper.c:2104: ST1 = floatx80_chs(ST1); target/i386/tcg/fpu_helper.c:2119: ST1 = floatx80_chs(ST0); target/i386/tcg/fpu_helper.c:2135: ST1 = floatx80_chs(ST1); target/i386/tcg/fpu_helper.c:2140: ST1 = floatx80_chs(floatx80_zero); target/i386/tcg/fpu_helper.c:2276: floatx80_chs(floatx80_zero) : target/i386/tcg/fpu_helper.c:2285: floatx80_chs(floatx80_infinity) : target/m68k/fpu_helper.c:212: res->d = floatx80_round(floatx80_abs(val->d), &env->fp_status); target/m68k/fpu_helper.c:218: res->d = floatx80_round(floatx80_abs(val->d), &env->fp_status); target/m68k/fpu_helper.c:225: res->d = floatx80_round(floatx80_abs(val->d), &env->fp_status); target/m68k/fpu_helper.c:231: res->d = floatx80_round(floatx80_chs(val->d), &env->fp_status); target/m68k/fpu_helper.c:237: res->d = floatx80_round(floatx80_chs(val->d), &env->fp_status); target/m68k/fpu_helper.c:244: res->d = floatx80_round(floatx80_chs(val->d), &env->fp_status); target/m68k/fpu_helper.c:557: quotient = floatx80_to_int32(floatx80_abs(fp_quot.d), &env->fp_status); target/m68k/softfloat.c:2714: fp0 = floatx80_abs(a); /* Y = |X| */ target/m68k/softfloat.c:2734: fp0 = floatx80_abs(a); /* Y = |X| */ target/mips/tcg/fpu_helper.c:977: return float64_abs(fdt0); target/mips/tcg/fpu_helper.c:982: return float32_abs(fst0); target/mips/tcg/fpu_helper.c:990: wt0 = float32_abs(fdt0 & 0XFFFFFFFF); target/mips/tcg/fpu_helper.c:991: wth0 = float32_abs(fdt0 >> 32); target/mips/tcg/fpu_helper.c:997: return float64_chs(fdt0); target/mips/tcg/fpu_helper.c:1002: return float32_chs(fst0); target/mips/tcg/fpu_helper.c:1010: wt0 = float32_chs(fdt0 & 0XFFFFFFFF); target/mips/tcg/fpu_helper.c:1011: wth0 = float32_chs(fdt0 >> 32); target/mips/tcg/fpu_helper.c:1365: fdt2 = float64_chs(float64_sub(fdt2, float64_one, target/mips/tcg/fpu_helper.c:1374: fst2 = float32_chs(float32_sub(fst2, float32_one, target/mips/tcg/fpu_helper.c:1389: fstl2 = float32_chs(float32_sub(fstl2, float32_one, target/mips/tcg/fpu_helper.c:1391: fsth2 = float32_chs(float32_sub(fsth2, float32_one, target/mips/tcg/fpu_helper.c:1401: fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64, target/mips/tcg/fpu_helper.c:1411: fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, target/mips/tcg/fpu_helper.c:1428: fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32, target/mips/tcg/fpu_helper.c:1430: fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, target/mips/tcg/fpu_helper.c:1633: fst0 = float64_chs(fst0); target/mips/tcg/fpu_helper.c:1644: fst0 = float32_chs(fst0); target/mips/tcg/fpu_helper.c:1662: fstl0 = float32_chs(fstl0); target/mips/tcg/fpu_helper.c:1665: fsth0 = float32_chs(fsth0); target/mips/tcg/fpu_helper.c:1676: fst0 = float64_chs(fst0); target/mips/tcg/fpu_helper.c:1687: fst0 = float32_chs(fst0); target/mips/tcg/fpu_helper.c:1705: fstl0 = float32_chs(fstl0); target/mips/tcg/fpu_helper.c:1708: fsth0 = float32_chs(fsth0); target/mips/tcg/fpu_helper.c:1781: fdt0 = float64_abs(fdt0); \ target/mips/tcg/fpu_helper.c:1782: fdt1 = float64_abs(fdt1); \ target/mips/tcg/fpu_helper.c:1860: fst0 = float32_abs(fst0); \ target/mips/tcg/fpu_helper.c:1861: fst1 = float32_abs(fst1); \ target/mips/tcg/fpu_helper.c:1950: fst0 = float32_abs(fdt0 & 0XFFFFFFFF); \ target/mips/tcg/fpu_helper.c:1951: fsth0 = float32_abs(fdt0 >> 32); \ target/mips/tcg/fpu_helper.c:1952: fst1 = float32_abs(fdt1 & 0XFFFFFFFF); \ target/mips/tcg/fpu_helper.c:1953: fsth1 = float32_abs(fdt1 >> 32); \ target/ppc/fpu_helper.c:44: return float32_chs(a); target/ppc/int_helper.c:694: float32 bneg = float32_chs(b->f32[i]); target/s390x/tcg/vec_fpu_helper.c:922: a = float32_abs(a); target/s390x/tcg/vec_fpu_helper.c:923: b = float32_abs(b); target/s390x/tcg/vec_fpu_helper.c:984: a = float64_abs(a); target/s390x/tcg/vec_fpu_helper.c:985: b = float64_abs(b); target/s390x/tcg/vec_fpu_helper.c:1042: a = float128_abs(a); target/s390x/tcg/vec_fpu_helper.c:1043: b = float128_abs(b); target/sparc/fop_helper.c:119: return float32_chs(src); target/sparc/fop_helper.c:125: return float64_chs(src); target/sparc/fop_helper.c:130: QT0 = float128_chs(QT1); target/sparc/fop_helper.c:234: return float32_abs(src); target/sparc/fop_helper.c:240: return float64_abs(src); target/sparc/fop_helper.c:245: QT0 = float128_abs(QT1); target/xtensa/fpu_helper.c:126: return float64_abs(v); target/xtensa/fpu_helper.c:131: return float32_abs(v); target/xtensa/fpu_helper.c:136: return float64_chs(v); target/xtensa/fpu_helper.c:141: return float32_chs(v);
On 1/30/24 18:40, Philippe Mathieu-Daudé wrote: > Hi Richard, > > On 3/11/23 18:38, Richard Henderson wrote: >> These are simple bit manipulation insns. >> Begin using i128 for float128. >> Implement FMOVq with do_qq. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/sparc/helper.h | 6 ---- >> target/sparc/fop_helper.c | 34 --------------------- >> target/sparc/translate.c | 62 +++++++++++++++++++-------------------- >> 3 files changed, 30 insertions(+), 72 deletions(-) > > >> @@ -1239,13 +1235,13 @@ static void gen_op_fmovs(TCGv_i32 dst, TCGv_i32 src) >> static void gen_op_fnegs(TCGv_i32 dst, TCGv_i32 src) >> { >> gen_op_clear_ieee_excp_and_FTT(); >> - gen_helper_fnegs(dst, src); >> + tcg_gen_xori_i32(dst, src, 1u << 31); >> } >> static void gen_op_fabss(TCGv_i32 dst, TCGv_i32 src) >> { >> gen_op_clear_ieee_excp_and_FTT(); >> - gen_helper_fabss(dst, src); >> + tcg_gen_andi_i32(dst, src, ~(1u << 31)); >> } >> static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src) >> @@ -1257,13 +1253,33 @@ static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src) >> static void gen_op_fnegd(TCGv_i64 dst, TCGv_i64 src) >> { >> gen_op_clear_ieee_excp_and_FTT(); >> - gen_helper_fnegd(dst, src); >> + tcg_gen_xori_i64(dst, src, 1ull << 63); >> } >> static void gen_op_fabsd(TCGv_i64 dst, TCGv_i64 src) >> { >> gen_op_clear_ieee_excp_and_FTT(); >> - gen_helper_fabsd(dst, src); >> + tcg_gen_andi_i64(dst, src, ~(1ull << 63)); >> +} >> + >> +static void gen_op_fnegq(TCGv_i128 dst, TCGv_i128 src) >> +{ >> + TCGv_i64 l = tcg_temp_new_i64(); >> + TCGv_i64 h = tcg_temp_new_i64(); >> + >> + tcg_gen_extr_i128_i64(l, h, src); >> + tcg_gen_xori_i64(h, h, 1ull << 63); >> + tcg_gen_concat_i64_i128(dst, l, h); >> +} >> + >> +static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src) >> +{ >> + TCGv_i64 l = tcg_temp_new_i64(); >> + TCGv_i64 h = tcg_temp_new_i64(); >> + >> + tcg_gen_extr_i128_i64(l, h, src); >> + tcg_gen_andi_i64(h, h, ~(1ull << 63)); >> + tcg_gen_concat_i64_i128(dst, l, h); >> } > > Why not extract these as generic TCG FPU helpers? The representation of floating-point registers varies wildly between targets. Sparc would be the only one to (a) have float128 and (b) represent them in TCGv_i128. Even considering float32, is the representation TCGv_i32 or TCGv_i64? Should the result be nan-boxed (riscv and loongarch)? We already provide tcg_gen_xori_i{32,64}, so, really that's enough for any target. > $ git grep -wE 'float...?_(chs|abs)' target/ > target/arm/tcg/helper-a64.c:214: a = float16_chs(a); > target/arm/tcg/helper-a64.c:229: a = float32_chs(a); > target/arm/tcg/helper-a64.c:244: a = float64_chs(a); > target/arm/tcg/helper-a64.c:259: a = float16_chs(a); > target/arm/tcg/helper-a64.c:274: a = float32_chs(a); > target/arm/tcg/helper-a64.c:289: a = float64_chs(a); > target/arm/tcg/helper-a64.c:632: float16 f0 = float16_abs(a); > target/arm/tcg/helper-a64.c:633: float16 f1 = float16_abs(b); > target/arm/tcg/helper-a64.c:642: float16 f0 = float16_abs(a); > target/arm/tcg/helper-a64.c:643: float16 f1 = float16_abs(b); > target/arm/tcg/mve_helper.c:2840: return float16_abs(float16_sub(a, b, s)); > target/arm/tcg/mve_helper.c:2845: return float32_abs(float32_sub(a, b, s)); > target/arm/tcg/mve_helper.c:2854: return float16_maxnum(float16_abs(a), float16_abs(b), > s); > target/arm/tcg/mve_helper.c:2859: return float32_maxnum(float32_abs(a), float32_abs(b), > s); > target/arm/tcg/mve_helper.c:2864: return float16_minnum(float16_abs(a), float16_abs(b), > s); > target/arm/tcg/mve_helper.c:2869: return float32_minnum(float32_abs(a), float32_abs(b), > s); > target/arm/tcg/neon_helper.c:1513: float32 f0 = float32_abs(make_float32(a)); > target/arm/tcg/neon_helper.c:1514: float32 f1 = float32_abs(make_float32(b)); > target/arm/tcg/neon_helper.c:1521: float32 f0 = float32_abs(make_float32(a)); > target/arm/tcg/neon_helper.c:1522: float32 f1 = float32_abs(make_float32(b)); > target/arm/tcg/neon_helper.c:1529: float64 f0 = float64_abs(make_float64(a)); > target/arm/tcg/neon_helper.c:1530: float64 f1 = float64_abs(make_float64(b)); > target/arm/tcg/neon_helper.c:1537: float64 f0 = float64_abs(make_float64(a)); > target/arm/tcg/neon_helper.c:1538: float64 f1 = float64_abs(make_float64(b)); > target/arm/tcg/sve_helper.c:4227:DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, > float16_chs(float16_infinity)) > target/arm/tcg/sve_helper.c:4228:DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, > float32_chs(float32_infinity)) > target/arm/tcg/sve_helper.c:4229:DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, > float64_chs(float64_infinity)) > target/arm/tcg/sve_helper.c:4345: return float16_abs(float16_sub(a, b, s)); > target/arm/tcg/sve_helper.c:4350: return float32_abs(float32_sub(a, b, s)); > target/arm/tcg/sve_helper.c:4355: return float64_abs(float64_sub(a, b, s)); > target/arm/tcg/sve_helper.c:4997: mm = float16_abs(mm); > target/arm/tcg/sve_helper.c:5019: mm = float32_abs(mm); > target/arm/tcg/sve_helper.c:5045: mm = float64_abs(mm); > target/arm/tcg/sve_helper.c:5062: float16 neg_real = float16_chs(neg_imag); > target/arm/tcg/sve_helper.c:5094: float32 neg_real = float32_chs(neg_imag); > target/arm/tcg/sve_helper.c:5126: float64 neg_real = float64_chs(neg_imag); > target/arm/tcg/vec_helper.c:996: return -float16_le(float16_abs(op2), float16_abs(op1), > stat); > target/arm/tcg/vec_helper.c:1001: return -float32_le(float32_abs(op2), > float32_abs(op1), stat); > target/arm/tcg/vec_helper.c:1006: return -float16_lt(float16_abs(op2), > float16_abs(op1), stat); > target/arm/tcg/vec_helper.c:1011: return -float32_lt(float32_abs(op2), > float32_abs(op1), stat); > target/arm/tcg/vec_helper.c:1124: return float16_abs(float16_sub(op1, op2, stat)); > target/arm/tcg/vec_helper.c:1129: return float32_abs(float32_sub(op1, op2, stat)); > target/arm/tcg/vec_helper.c:1304: return float16_muladd(float16_chs(op1), op2, dest, 0, > stat); > target/arm/tcg/vec_helper.c:1310: return float32_muladd(float32_chs(op1), op2, dest, 0, > stat); > target/arm/vfp_helper.c:286: return float16_chs(a); > target/arm/vfp_helper.c:291: return float32_chs(a); > target/arm/vfp_helper.c:296: return float64_chs(a); > target/arm/vfp_helper.c:301: return float16_abs(a); > target/arm/vfp_helper.c:306: return float32_abs(a); > target/arm/vfp_helper.c:311: return float64_abs(a); > target/arm/vfp_helper.c:688: } else if (float16_abs(f16) < (1 << 8)) { > target/arm/vfp_helper.c:738: } else if (float32_abs(f32) < (1ULL << 21)) { > target/arm/vfp_helper.c:1133: if (value == float64_chs(float64_zero)) { > target/i386/tcg/fpu_helper.c:591: ST0 = floatx80_chs(ST0); > target/i386/tcg/fpu_helper.c:596: ST0 = floatx80_abs(ST0); > target/i386/tcg/fpu_helper.c:781: tmp = floatx80_chs(tmp); > target/i386/tcg/fpu_helper.c:1739: ST0 = floatx80_div(floatx80_chs(floatx80_one), > floatx80_zero, > target/i386/tcg/fpu_helper.c:2104: ST1 = floatx80_chs(ST1); > target/i386/tcg/fpu_helper.c:2119: ST1 = floatx80_chs(ST0); > target/i386/tcg/fpu_helper.c:2135: ST1 = floatx80_chs(ST1); > target/i386/tcg/fpu_helper.c:2140: ST1 = floatx80_chs(floatx80_zero); > target/i386/tcg/fpu_helper.c:2276: floatx80_chs(floatx80_zero) : > target/i386/tcg/fpu_helper.c:2285: floatx80_chs(floatx80_infinity) : > target/m68k/fpu_helper.c:212: res->d = floatx80_round(floatx80_abs(val->d), > &env->fp_status); > target/m68k/fpu_helper.c:218: res->d = floatx80_round(floatx80_abs(val->d), > &env->fp_status); > target/m68k/fpu_helper.c:225: res->d = floatx80_round(floatx80_abs(val->d), > &env->fp_status); > target/m68k/fpu_helper.c:231: res->d = floatx80_round(floatx80_chs(val->d), > &env->fp_status); > target/m68k/fpu_helper.c:237: res->d = floatx80_round(floatx80_chs(val->d), > &env->fp_status); > target/m68k/fpu_helper.c:244: res->d = floatx80_round(floatx80_chs(val->d), > &env->fp_status); > target/m68k/fpu_helper.c:557: quotient = floatx80_to_int32(floatx80_abs(fp_quot.d), > &env->fp_status); > target/m68k/softfloat.c:2714: fp0 = floatx80_abs(a); /* Y = |X| */ > target/m68k/softfloat.c:2734: fp0 = floatx80_abs(a); /* Y = |X| */ > target/mips/tcg/fpu_helper.c:977: return float64_abs(fdt0); > target/mips/tcg/fpu_helper.c:982: return float32_abs(fst0); > target/mips/tcg/fpu_helper.c:990: wt0 = float32_abs(fdt0 & 0XFFFFFFFF); > target/mips/tcg/fpu_helper.c:991: wth0 = float32_abs(fdt0 >> 32); > target/mips/tcg/fpu_helper.c:997: return float64_chs(fdt0); > target/mips/tcg/fpu_helper.c:1002: return float32_chs(fst0); > target/mips/tcg/fpu_helper.c:1010: wt0 = float32_chs(fdt0 & 0XFFFFFFFF); > target/mips/tcg/fpu_helper.c:1011: wth0 = float32_chs(fdt0 >> 32); > target/mips/tcg/fpu_helper.c:1365: fdt2 = float64_chs(float64_sub(fdt2, float64_one, > target/mips/tcg/fpu_helper.c:1374: fst2 = float32_chs(float32_sub(fst2, float32_one, > target/mips/tcg/fpu_helper.c:1389: fstl2 = float32_chs(float32_sub(fstl2, float32_one, > target/mips/tcg/fpu_helper.c:1391: fsth2 = float32_chs(float32_sub(fsth2, float32_one, > target/mips/tcg/fpu_helper.c:1401: fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64, > target/mips/tcg/fpu_helper.c:1411: fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, > target/mips/tcg/fpu_helper.c:1428: fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32, > target/mips/tcg/fpu_helper.c:1430: fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, > target/mips/tcg/fpu_helper.c:1633: fst0 = float64_chs(fst0); > target/mips/tcg/fpu_helper.c:1644: fst0 = float32_chs(fst0); > target/mips/tcg/fpu_helper.c:1662: fstl0 = float32_chs(fstl0); > target/mips/tcg/fpu_helper.c:1665: fsth0 = float32_chs(fsth0); > target/mips/tcg/fpu_helper.c:1676: fst0 = float64_chs(fst0); > target/mips/tcg/fpu_helper.c:1687: fst0 = float32_chs(fst0); > target/mips/tcg/fpu_helper.c:1705: fstl0 = float32_chs(fstl0); > target/mips/tcg/fpu_helper.c:1708: fsth0 = float32_chs(fsth0); > target/mips/tcg/fpu_helper.c:1781: fdt0 = float64_abs(fdt0); \ > target/mips/tcg/fpu_helper.c:1782: fdt1 = float64_abs(fdt1); \ > target/mips/tcg/fpu_helper.c:1860: fst0 = float32_abs(fst0); \ > target/mips/tcg/fpu_helper.c:1861: fst1 = float32_abs(fst1); \ > target/mips/tcg/fpu_helper.c:1950: fst0 = float32_abs(fdt0 & > 0XFFFFFFFF); \ > target/mips/tcg/fpu_helper.c:1951: fsth0 = float32_abs(fdt0 >> 32); > \ > target/mips/tcg/fpu_helper.c:1952: fst1 = float32_abs(fdt1 & > 0XFFFFFFFF); \ > target/mips/tcg/fpu_helper.c:1953: fsth1 = float32_abs(fdt1 >> 32); > \ > target/ppc/fpu_helper.c:44: return float32_chs(a); > target/ppc/int_helper.c:694: float32 bneg = float32_chs(b->f32[i]); > target/s390x/tcg/vec_fpu_helper.c:922: a = float32_abs(a); > target/s390x/tcg/vec_fpu_helper.c:923: b = float32_abs(b); > target/s390x/tcg/vec_fpu_helper.c:984: a = float64_abs(a); > target/s390x/tcg/vec_fpu_helper.c:985: b = float64_abs(b); > target/s390x/tcg/vec_fpu_helper.c:1042: a = float128_abs(a); > target/s390x/tcg/vec_fpu_helper.c:1043: b = float128_abs(b); > target/sparc/fop_helper.c:119: return float32_chs(src); > target/sparc/fop_helper.c:125: return float64_chs(src); > target/sparc/fop_helper.c:130: QT0 = float128_chs(QT1); > target/sparc/fop_helper.c:234: return float32_abs(src); > target/sparc/fop_helper.c:240: return float64_abs(src); > target/sparc/fop_helper.c:245: QT0 = float128_abs(QT1); > target/xtensa/fpu_helper.c:126: return float64_abs(v); > target/xtensa/fpu_helper.c:131: return float32_abs(v); > target/xtensa/fpu_helper.c:136: return float64_chs(v); > target/xtensa/fpu_helper.c:141: return float32_chs(v); With only a few exceptions, most of of these results are part of a larger out-of-line operation. r~
On 3/11/23 18:38, Richard Henderson wrote: > These are simple bit manipulation insns. > Begin using i128 for float128. > Implement FMOVq with do_qq. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/sparc/helper.h | 6 ---- > target/sparc/fop_helper.c | 34 --------------------- > target/sparc/translate.c | 62 +++++++++++++++++++-------------------- > 3 files changed, 30 insertions(+), 72 deletions(-) Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
© 2016 - 2024 Red Hat, Inc.