The existing f{32,64}_addsub_post test, which checks for zero
inputs, is identical to f{32,64}_mul_fast_test. Which means
we can eliminate the fast_test/fast_op hooks in favor of
reusing the same post hook.
This means we have one fewer test along the fast path for multiply.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 65 +++++++++++--------------------------------------
1 file changed, 14 insertions(+), 51 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index a362bf89ca..5fb4ef75bb 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -339,12 +339,10 @@ static inline bool f64_is_inf(union_float64 a)
return float64_is_infinity(a.s);
}
-/* Note: @fast_test and @post can be NULL */
static inline float32
float32_gen2(float32 xa, float32 xb, float_status *s,
hard_f32_op2_fn hard, soft_f32_op2_fn soft,
- f32_check_fn pre, f32_check_fn post,
- f32_check_fn fast_test, soft_f32_op2_fn fast_op)
+ f32_check_fn pre, f32_check_fn post)
{
union_float32 ua, ub, ur;
@@ -359,17 +357,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
if (unlikely(!pre(ua, ub))) {
goto soft;
}
- if (fast_test && fast_test(ua, ub)) {
- return fast_op(ua.s, ub.s, s);
- }
ur.h = hard(ua.h, ub.h);
if (unlikely(f32_is_inf(ur))) {
s->float_exception_flags |= float_flag_overflow;
- } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
- if (post == NULL || post(ua, ub)) {
- goto soft;
- }
+ } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
+ goto soft;
}
return ur.s;
@@ -380,8 +373,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
static inline float64
float64_gen2(float64 xa, float64 xb, float_status *s,
hard_f64_op2_fn hard, soft_f64_op2_fn soft,
- f64_check_fn pre, f64_check_fn post,
- f64_check_fn fast_test, soft_f64_op2_fn fast_op)
+ f64_check_fn pre, f64_check_fn post)
{
union_float64 ua, ub, ur;
@@ -396,17 +388,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
if (unlikely(!pre(ua, ub))) {
goto soft;
}
- if (fast_test && fast_test(ua, ub)) {
- return fast_op(ua.s, ub.s, s);
- }
ur.h = hard(ua.h, ub.h);
if (unlikely(f64_is_inf(ur))) {
s->float_exception_flags |= float_flag_overflow;
- } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
- if (post == NULL || post(ua, ub)) {
- goto soft;
- }
+ } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
+ goto soft;
}
return ur.s;
@@ -1115,7 +1102,7 @@ static double hard_f64_sub(double a, double b)
return a - b;
}
-static bool f32_addsub_post(union_float32 a, union_float32 b)
+static bool f32_addsubmul_post(union_float32 a, union_float32 b)
{
if (QEMU_HARDFLOAT_2F32_USE_FP) {
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
@@ -1123,7 +1110,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b)
return !(float32_is_zero(a.s) && float32_is_zero(b.s));
}
-static bool f64_addsub_post(union_float64 a, union_float64 b)
+static bool f64_addsubmul_post(union_float64 a, union_float64 b)
{
if (QEMU_HARDFLOAT_2F64_USE_FP) {
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
@@ -1136,14 +1123,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s,
hard_f32_op2_fn hard, soft_f32_op2_fn soft)
{
return float32_gen2(a, b, s, hard, soft,
- f32_is_zon2, f32_addsub_post, NULL, NULL);
+ f32_is_zon2, f32_addsubmul_post);
}
static float64 float64_addsub(float64 a, float64 b, float_status *s,
hard_f64_op2_fn hard, soft_f64_op2_fn soft)
{
return float64_gen2(a, b, s, hard, soft,
- f64_is_zon2, f64_addsub_post, NULL, NULL);
+ f64_is_zon2, f64_addsubmul_post);
}
float32 QEMU_FLATTEN
@@ -1258,42 +1245,18 @@ static double hard_f64_mul(double a, double b)
return a * b;
}
-static bool f32_mul_fast_test(union_float32 a, union_float32 b)
-{
- return float32_is_zero(a.s) || float32_is_zero(b.s);
-}
-
-static bool f64_mul_fast_test(union_float64 a, union_float64 b)
-{
- return float64_is_zero(a.s) || float64_is_zero(b.s);
-}
-
-static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
-{
- bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
-
- return float32_set_sign(float32_zero, signbit);
-}
-
-static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
-{
- bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
-
- return float64_set_sign(float64_zero, signbit);
-}
-
float32 QEMU_FLATTEN
float32_mul(float32 a, float32 b, float_status *s)
{
return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
- f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
+ f32_is_zon2, f32_addsubmul_post);
}
float64 QEMU_FLATTEN
float64_mul(float64 a, float64 b, float_status *s)
{
return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
- f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
+ f64_is_zon2, f64_addsubmul_post);
}
/*
@@ -1834,14 +1797,14 @@ float32 QEMU_FLATTEN
float32_div(float32 a, float32 b, float_status *s)
{
return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
- f32_div_pre, f32_div_post, NULL, NULL);
+ f32_div_pre, f32_div_post);
}
float64 QEMU_FLATTEN
float64_div(float64 a, float64 b, float_status *s)
{
return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
- f64_div_pre, f64_div_post, NULL, NULL);
+ f64_div_pre, f64_div_post);
}
/*
--
2.20.1
Richard Henderson <richard.henderson@linaro.org> writes:
> The existing f{32,64}_addsub_post test, which checks for zero
> inputs, is identical to f{32,64}_mul_fast_test. Which means
> we can eliminate the fast_test/fast_op hooks in favor of
> reusing the same post hook.
>
> This means we have one fewer test along the fast path for multiply.
I was worried that we were missing an opportunity for the early fast
path but fp-bench disagrees.
Before:
./fp-bench -o mul
139.01 MFlops
After:
./fp-bench -o mul
155.28 MFlops
So:
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> fpu/softfloat.c | 65 +++++++++++--------------------------------------
> 1 file changed, 14 insertions(+), 51 deletions(-)
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index a362bf89ca..5fb4ef75bb 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -339,12 +339,10 @@ static inline bool f64_is_inf(union_float64 a)
> return float64_is_infinity(a.s);
> }
>
> -/* Note: @fast_test and @post can be NULL */
> static inline float32
> float32_gen2(float32 xa, float32 xb, float_status *s,
> hard_f32_op2_fn hard, soft_f32_op2_fn soft,
> - f32_check_fn pre, f32_check_fn post,
> - f32_check_fn fast_test, soft_f32_op2_fn fast_op)
> + f32_check_fn pre, f32_check_fn post)
> {
> union_float32 ua, ub, ur;
>
> @@ -359,17 +357,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
> if (unlikely(!pre(ua, ub))) {
> goto soft;
> }
> - if (fast_test && fast_test(ua, ub)) {
> - return fast_op(ua.s, ub.s, s);
> - }
>
> ur.h = hard(ua.h, ub.h);
> if (unlikely(f32_is_inf(ur))) {
> s->float_exception_flags |= float_flag_overflow;
> - } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
> - if (post == NULL || post(ua, ub)) {
> - goto soft;
> - }
> + } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
> + goto soft;
> }
> return ur.s;
>
> @@ -380,8 +373,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
> static inline float64
> float64_gen2(float64 xa, float64 xb, float_status *s,
> hard_f64_op2_fn hard, soft_f64_op2_fn soft,
> - f64_check_fn pre, f64_check_fn post,
> - f64_check_fn fast_test, soft_f64_op2_fn fast_op)
> + f64_check_fn pre, f64_check_fn post)
> {
> union_float64 ua, ub, ur;
>
> @@ -396,17 +388,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
> if (unlikely(!pre(ua, ub))) {
> goto soft;
> }
> - if (fast_test && fast_test(ua, ub)) {
> - return fast_op(ua.s, ub.s, s);
> - }
>
> ur.h = hard(ua.h, ub.h);
> if (unlikely(f64_is_inf(ur))) {
> s->float_exception_flags |= float_flag_overflow;
> - } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
> - if (post == NULL || post(ua, ub)) {
> - goto soft;
> - }
> + } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
> + goto soft;
> }
> return ur.s;
>
> @@ -1115,7 +1102,7 @@ static double hard_f64_sub(double a, double b)
> return a - b;
> }
>
> -static bool f32_addsub_post(union_float32 a, union_float32 b)
> +static bool f32_addsubmul_post(union_float32 a, union_float32 b)
> {
> if (QEMU_HARDFLOAT_2F32_USE_FP) {
> return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
> @@ -1123,7 +1110,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b)
> return !(float32_is_zero(a.s) && float32_is_zero(b.s));
> }
>
> -static bool f64_addsub_post(union_float64 a, union_float64 b)
> +static bool f64_addsubmul_post(union_float64 a, union_float64 b)
> {
> if (QEMU_HARDFLOAT_2F64_USE_FP) {
> return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
> @@ -1136,14 +1123,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s,
> hard_f32_op2_fn hard, soft_f32_op2_fn soft)
> {
> return float32_gen2(a, b, s, hard, soft,
> - f32_is_zon2, f32_addsub_post, NULL, NULL);
> + f32_is_zon2, f32_addsubmul_post);
> }
>
> static float64 float64_addsub(float64 a, float64 b, float_status *s,
> hard_f64_op2_fn hard, soft_f64_op2_fn soft)
> {
> return float64_gen2(a, b, s, hard, soft,
> - f64_is_zon2, f64_addsub_post, NULL, NULL);
> + f64_is_zon2, f64_addsubmul_post);
> }
>
> float32 QEMU_FLATTEN
> @@ -1258,42 +1245,18 @@ static double hard_f64_mul(double a, double b)
> return a * b;
> }
>
> -static bool f32_mul_fast_test(union_float32 a, union_float32 b)
> -{
> - return float32_is_zero(a.s) || float32_is_zero(b.s);
> -}
> -
> -static bool f64_mul_fast_test(union_float64 a, union_float64 b)
> -{
> - return float64_is_zero(a.s) || float64_is_zero(b.s);
> -}
> -
> -static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
> -{
> - bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
> -
> - return float32_set_sign(float32_zero, signbit);
> -}
> -
> -static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
> -{
> - bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
> -
> - return float64_set_sign(float64_zero, signbit);
> -}
> -
> float32 QEMU_FLATTEN
> float32_mul(float32 a, float32 b, float_status *s)
> {
> return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
> - f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
> + f32_is_zon2, f32_addsubmul_post);
> }
>
> float64 QEMU_FLATTEN
> float64_mul(float64 a, float64 b, float_status *s)
> {
> return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
> - f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
> + f64_is_zon2, f64_addsubmul_post);
> }
>
> /*
> @@ -1834,14 +1797,14 @@ float32 QEMU_FLATTEN
> float32_div(float32 a, float32 b, float_status *s)
> {
> return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
> - f32_div_pre, f32_div_post, NULL, NULL);
> + f32_div_pre, f32_div_post);
> }
>
> float64 QEMU_FLATTEN
> float64_div(float64 a, float64 b, float_status *s)
> {
> return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
> - f64_div_pre, f64_div_post, NULL, NULL);
> + f64_div_pre, f64_div_post);
> }
>
> /*
--
Alex Bennée
© 2016 - 2026 Red Hat, Inc.