fpu/softfloat.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-)
This merges the int_to_float routine and the round_pack_canonical
routine into the same function, allowing the FloatParts structure
to be decomposed by the compiler.
This results in a 60-75% speedup of the flattened function.
Leave the narrower integer inputs to tail-call the int64_t version.
Buglink: https://bugs.launchpad.net/qemu/+bug/1892081
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 7b6aee9323..2cbcf5bf10 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2794,7 +2794,8 @@ static FloatParts int_to_float(int64_t a, int scale, float_status *status)
return r;
}
-float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
+float16 QEMU_FLATTEN
+int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
{
FloatParts pa = int_to_float(a, scale, status);
return float16_round_pack_canonical(pa, status);
@@ -2830,7 +2831,8 @@ float16 int8_to_float16(int8_t a, float_status *status)
return int64_to_float16_scalbn(a, 0, status);
}
-float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
+float32 QEMU_FLATTEN
+int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
{
FloatParts pa = int_to_float(a, scale, status);
return float32_round_pack_canonical(pa, status);
@@ -2861,7 +2863,8 @@ float32 int16_to_float32(int16_t a, float_status *status)
return int64_to_float32_scalbn(a, 0, status);
}
-float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
+float64 QEMU_FLATTEN
+int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
{
FloatParts pa = int_to_float(a, scale, status);
return float64_round_pack_canonical(pa, status);
@@ -2897,7 +2900,8 @@ float64 int16_to_float64(int16_t a, float_status *status)
* to the bfloat16 format.
*/
-bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
+bfloat16 QEMU_FLATTEN
+int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
{
FloatParts pa = int_to_float(a, scale, status);
return bfloat16_round_pack_canonical(pa, status);
@@ -2959,7 +2963,8 @@ static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
return r;
}
-float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
+float16 QEMU_FLATTEN
+uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
{
FloatParts pa = uint_to_float(a, scale, status);
return float16_round_pack_canonical(pa, status);
@@ -2995,7 +3000,8 @@ float16 uint8_to_float16(uint8_t a, float_status *status)
return uint64_to_float16_scalbn(a, 0, status);
}
-float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
+float32 QEMU_FLATTEN
+uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
{
FloatParts pa = uint_to_float(a, scale, status);
return float32_round_pack_canonical(pa, status);
@@ -3026,7 +3032,8 @@ float32 uint16_to_float32(uint16_t a, float_status *status)
return uint64_to_float32_scalbn(a, 0, status);
}
-float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
+float64 QEMU_FLATTEN
+uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
{
FloatParts pa = uint_to_float(a, scale, status);
return float64_round_pack_canonical(pa, status);
@@ -3062,7 +3069,8 @@ float64 uint16_to_float64(uint16_t a, float_status *status)
* bfloat16 format.
*/
-bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
+bfloat16 QEMU_FLATTEN
+uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
{
FloatParts pa = uint_to_float(a, scale, status);
return bfloat16_round_pack_canonical(pa, status);
--
2.25.1
Patchew URL: https://patchew.org/QEMU/20201018203334.1229243-1-richard.henderson@linaro.org/ Hi, This series seems to have some coding style problems. See output below for more information: Type: series Message-id: 20201018203334.1229243-1-richard.henderson@linaro.org Subject: [PATCH] softfloat: Mark base int-to-float routines QEMU_FLATTEN === TEST SCRIPT BEGIN === #!/bin/bash git rev-parse base > /dev/null || exit 0 git config --local diff.renamelimit 0 git config --local diff.renames True git config --local diff.algorithm histogram ./scripts/checkpatch.pl --mailback base.. === TEST SCRIPT END === Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 Switched to a new branch 'test' 4ffd616 softfloat: Mark base int-to-float routines QEMU_FLATTEN === OUTPUT BEGIN === ERROR: spaces required around that '*' (ctx:WxV) #29: FILE: fpu/softfloat.c:2779: +int64_to_float16_scalbn(int64_t a, int scale, float_status *status) ^ ERROR: spaces required around that '*' (ctx:WxV) #39: FILE: fpu/softfloat.c:2816: +int64_to_float32_scalbn(int64_t a, int scale, float_status *status) ^ ERROR: spaces required around that '*' (ctx:WxV) #49: FILE: fpu/softfloat.c:2848: +int64_to_float64_scalbn(int64_t a, int scale, float_status *status) ^ ERROR: spaces required around that '*' (ctx:WxV) #59: FILE: fpu/softfloat.c:2885: +int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status) ^ ERROR: spaces required around that '*' (ctx:WxV) #69: FILE: fpu/softfloat.c:2948: +uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status) ^ ERROR: spaces required around that '*' (ctx:WxV) #79: FILE: fpu/softfloat.c:2985: +uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status) ^ ERROR: spaces required around that '*' (ctx:WxV) #89: FILE: fpu/softfloat.c:3017: +uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status) ^ ERROR: spaces required around that '*' (ctx:WxV) #99: FILE: fpu/softfloat.c:3054: +uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status) ^ total: 8 errors, 0 warnings, 72 lines checked Commit 4ffd616f4c5d (softfloat: Mark base int-to-float routines QEMU_FLATTEN) has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. === OUTPUT END === Test command exited with code: 1 The full log is available at http://patchew.org/logs/20201018203334.1229243-1-richard.henderson@linaro.org/testing.checkpatch/?type=message. --- Email generated automatically by Patchew [https://patchew.org/]. Please send your feedback to patchew-devel@redhat.com
Richard Henderson <richard.henderson@linaro.org> writes: > This merges the int_to_float routine and the round_pack_canonical > routine into the same function, allowing the FloatParts structure > to be decomposed by the compiler. > > This results in a 60-75% speedup of the flattened function. > > Leave the narrower integer inputs to tail-call the int64_t version. > > Buglink: https://bugs.launchpad.net/qemu/+bug/1892081 > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> -- Alex Bennée
On 10/18/20 10:33 PM, Richard Henderson wrote: > This merges the int_to_float routine and the round_pack_canonical > routine into the same function, allowing the FloatParts structure > to be decomposed by the compiler. > > This results in a 60-75% speedup of the flattened function. > > Leave the narrower integer inputs to tail-call the int64_t version. > Reported-by: Ahmed Karaman <ahmedkrmn@outlook.com> > Buglink: https://bugs.launchpad.net/qemu/+bug/1892081 > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> > --- > fpu/softfloat.c | 24 ++++++++++++++++-------- > 1 file changed, 16 insertions(+), 8 deletions(-) > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 7b6aee9323..2cbcf5bf10 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -2794,7 +2794,8 @@ static FloatParts int_to_float(int64_t a, int scale, float_status *status) > return r; > } > > -float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status) > +float16 QEMU_FLATTEN > +int64_to_float16_scalbn(int64_t a, int scale, float_status *status) > { > FloatParts pa = int_to_float(a, scale, status); > return float16_round_pack_canonical(pa, status); > @@ -2830,7 +2831,8 @@ float16 int8_to_float16(int8_t a, float_status *status) > return int64_to_float16_scalbn(a, 0, status); > } > > -float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status) > +float32 QEMU_FLATTEN > +int64_to_float32_scalbn(int64_t a, int scale, float_status *status) > { > FloatParts pa = int_to_float(a, scale, status); > return float32_round_pack_canonical(pa, status); > @@ -2861,7 +2863,8 @@ float32 int16_to_float32(int16_t a, float_status *status) > return int64_to_float32_scalbn(a, 0, status); > } > > -float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status) > +float64 QEMU_FLATTEN > +int64_to_float64_scalbn(int64_t a, int scale, float_status *status) > { > FloatParts pa = int_to_float(a, scale, status); > return float64_round_pack_canonical(pa, status); > @@ -2897,7 +2900,8 @@ float64 int16_to_float64(int16_t a, float_status *status) > * to the bfloat16 format. > */ > > -bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status) > +bfloat16 QEMU_FLATTEN > +int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status) > { > FloatParts pa = int_to_float(a, scale, status); > return bfloat16_round_pack_canonical(pa, status); > @@ -2959,7 +2963,8 @@ static FloatParts uint_to_float(uint64_t a, int scale, float_status *status) > return r; > } > > -float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status) > +float16 QEMU_FLATTEN > +uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status) > { > FloatParts pa = uint_to_float(a, scale, status); > return float16_round_pack_canonical(pa, status); > @@ -2995,7 +3000,8 @@ float16 uint8_to_float16(uint8_t a, float_status *status) > return uint64_to_float16_scalbn(a, 0, status); > } > > -float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status) > +float32 QEMU_FLATTEN > +uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status) > { > FloatParts pa = uint_to_float(a, scale, status); > return float32_round_pack_canonical(pa, status); > @@ -3026,7 +3032,8 @@ float32 uint16_to_float32(uint16_t a, float_status *status) > return uint64_to_float32_scalbn(a, 0, status); > } > > -float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status) > +float64 QEMU_FLATTEN > +uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status) > { > FloatParts pa = uint_to_float(a, scale, status); > return float64_round_pack_canonical(pa, status); > @@ -3062,7 +3069,8 @@ float64 uint16_to_float64(uint16_t a, float_status *status) > * bfloat16 format. > */ > > -bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status) > +bfloat16 QEMU_FLATTEN > +uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status) > { > FloatParts pa = uint_to_float(a, scale, status); > return bfloat16_round_pack_canonical(pa, status); >
© 2016 - 2024 Red Hat, Inc.