Create a new exception flag for reporting input denormals that are not
flushed to zero, they are normalized and treated as normal numbers.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-types.h | 15 ++++---
fpu/softfloat.c | 84 +++++++++++------------------------
fpu/softfloat-parts.c.inc | 1 +
3 files changed, 36 insertions(+), 64 deletions(-)
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index e2d70ff556..174100e50e 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -143,13 +143,14 @@ typedef enum __attribute__((__packed__)) {
*/
enum {
- float_flag_invalid = 1,
- float_flag_divbyzero = 4,
- float_flag_overflow = 8,
- float_flag_underflow = 16,
- float_flag_inexact = 32,
- float_flag_iflush_denormal = 64,
- float_flag_oflush_denormal = 128
+ float_flag_invalid = 0x0001,
+ float_flag_divbyzero = 0x0002,
+ float_flag_overflow = 0x0004,
+ float_flag_underflow = 0x0008,
+ float_flag_inexact = 0x0010,
+ float_flag_inorm_denormal = 0x0020, /* denormal input, normalized */
+ float_flag_iflush_denormal = 0x0040, /* denormal input, flushed to zero */
+ float_flag_oflush_denormal = 0x0080, /* denormal result, flushed to zero */
};
/*
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index cb077cf111..e54cdb274d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -126,61 +126,23 @@ this code that are retained.
* denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
* and the result is < the minimum normal.
*/
-#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
+
+#define GEN_INPUT_FLUSH(name, soft_t) \
static inline void name(soft_t *a, float_status *s) \
{ \
if (unlikely(soft_t ## _is_denormal(*a))) { \
- *a = soft_t ## _set_sign(soft_t ## _zero, \
- soft_t ## _is_neg(*a)); \
- float_raise(float_flag_iflush_denormal, s); \
+ if (s->flush_inputs_to_zero) { \
+ *a = soft_t ## _set_sign(0, soft_t ## _is_neg(*a)); \
+ float_raise(float_flag_iflush_denormal, s); \
+ } else { \
+ float_raise(float_flag_inorm_denormal, s); \
+ } \
} \
}
-GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
-GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
-#undef GEN_INPUT_FLUSH__NOCHECK
-
-#define GEN_INPUT_FLUSH1(name, soft_t) \
- static inline void name(soft_t *a, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- }
-
-GEN_INPUT_FLUSH1(float32_input_flush1, float32)
-GEN_INPUT_FLUSH1(float64_input_flush1, float64)
-#undef GEN_INPUT_FLUSH1
-
-#define GEN_INPUT_FLUSH2(name, soft_t) \
- static inline void name(soft_t *a, soft_t *b, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- soft_t ## _input_flush__nocheck(b, s); \
- }
-
-GEN_INPUT_FLUSH2(float32_input_flush2, float32)
-GEN_INPUT_FLUSH2(float64_input_flush2, float64)
-#undef GEN_INPUT_FLUSH2
-
-#define GEN_INPUT_FLUSH3(name, soft_t) \
- static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- soft_t ## _input_flush__nocheck(b, s); \
- soft_t ## _input_flush__nocheck(c, s); \
- }
-
-GEN_INPUT_FLUSH3(float32_input_flush3, float32)
-GEN_INPUT_FLUSH3(float64_input_flush3, float64)
-#undef GEN_INPUT_FLUSH3
+GEN_INPUT_FLUSH(float32_input_flush, float32)
+GEN_INPUT_FLUSH(float64_input_flush, float64)
+#undef GEN_INPUT_FLUSH
/*
* Choose whether to use fpclassify or float32/64_* primitives in the generated
@@ -353,7 +315,8 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
goto soft;
}
- float32_input_flush2(&ua.s, &ub.s, s);
+ float32_input_flush(&ua.s, s);
+ float32_input_flush(&ub.s, s);
if (unlikely(!pre(ua, ub))) {
goto soft;
}
@@ -384,7 +347,8 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
goto soft;
}
- float64_input_flush2(&ua.s, &ub.s, s);
+ float64_input_flush(&ua.s, s);
+ float64_input_flush(&ub.s, s);
if (unlikely(!pre(ua, ub))) {
goto soft;
}
@@ -2161,7 +2125,9 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
goto soft;
}
- float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
+ float32_input_flush(&ua.s, s);
+ float32_input_flush(&ub.s, s);
+ float32_input_flush(&uc.s, s);
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
goto soft;
}
@@ -2232,7 +2198,9 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
goto soft;
}
- float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
+ float64_input_flush(&ua.s, s);
+ float64_input_flush(&ub.s, s);
+ float64_input_flush(&uc.s, s);
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
goto soft;
}
@@ -3988,7 +3956,8 @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
goto soft;
}
- float32_input_flush2(&ua.s, &ub.s, s);
+ float32_input_flush(&ua.s, s);
+ float32_input_flush(&ub.s, s);
if (isgreaterequal(ua.h, ub.h)) {
if (isgreater(ua.h, ub.h)) {
return float_relation_greater;
@@ -4038,7 +4007,8 @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
goto soft;
}
- float64_input_flush2(&ua.s, &ub.s, s);
+ float64_input_flush(&ua.s, s);
+ float64_input_flush(&ub.s, s);
if (isgreaterequal(ua.h, ub.h)) {
if (isgreater(ua.h, ub.h)) {
return float_relation_greater;
@@ -4230,7 +4200,7 @@ float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
goto soft;
}
- float32_input_flush1(&ua.s, s);
+ float32_input_flush(&ua.s, s);
if (QEMU_HARDFLOAT_1F32_USE_FP) {
if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
fpclassify(ua.h) == FP_ZERO) ||
@@ -4257,7 +4227,7 @@ float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
goto soft;
}
- float64_input_flush1(&ua.s, s);
+ float64_input_flush(&ua.s, s);
if (QEMU_HARDFLOAT_1F64_USE_FP) {
if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
fpclassify(ua.h) == FP_ZERO) ||
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 72e2b24539..16d4425419 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -119,6 +119,7 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
int shift = frac_normalize(p);
p->cls = float_class_normal;
p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
+ float_raise(float_flag_inorm_denormal, status);
}
} else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
p->cls = float_class_normal;
--
2.25.1
Richard Henderson <richard.henderson@linaro.org> writes:
> Create a new exception flag for reporting input denormals that are not
> flushed to zero, they are normalized and treated as normal numbers.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/fpu/softfloat-types.h | 15 ++++---
> fpu/softfloat.c | 84 +++++++++++------------------------
> fpu/softfloat-parts.c.inc | 1 +
> 3 files changed, 36 insertions(+), 64 deletions(-)
>
> diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
> index e2d70ff556..174100e50e 100644
> --- a/include/fpu/softfloat-types.h
> +++ b/include/fpu/softfloat-types.h
> @@ -143,13 +143,14 @@ typedef enum __attribute__((__packed__)) {
> */
>
> enum {
> - float_flag_invalid = 1,
> - float_flag_divbyzero = 4,
> - float_flag_overflow = 8,
> - float_flag_underflow = 16,
> - float_flag_inexact = 32,
> - float_flag_iflush_denormal = 64,
> - float_flag_oflush_denormal = 128
> + float_flag_invalid = 0x0001,
> + float_flag_divbyzero = 0x0002,
> + float_flag_overflow = 0x0004,
> + float_flag_underflow = 0x0008,
> + float_flag_inexact = 0x0010,
> + float_flag_inorm_denormal = 0x0020, /* denormal input, normalized */
> + float_flag_iflush_denormal = 0x0040, /* denormal input, flushed to zero */
> + float_flag_oflush_denormal = 0x0080, /* denormal result, flushed to zero */
> };
>
> /*
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index cb077cf111..e54cdb274d 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -126,61 +126,23 @@ this code that are retained.
> * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
> * and the result is < the minimum normal.
> */
> -#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
> +
> +#define GEN_INPUT_FLUSH(name, soft_t) \
> static inline void name(soft_t *a, float_status *s) \
> { \
> if (unlikely(soft_t ## _is_denormal(*a))) { \
> - *a = soft_t ## _set_sign(soft_t ## _zero, \
> - soft_t ## _is_neg(*a)); \
> - float_raise(float_flag_iflush_denormal, s); \
> + if (s->flush_inputs_to_zero) { \
> + *a = soft_t ## _set_sign(0, soft_t ## _is_neg(*a)); \
> + float_raise(float_flag_iflush_denormal, s); \
> + } else { \
> + float_raise(float_flag_inorm_denormal, s); \
> + } \
> } \
> }
So I'm guessing Emilio had the original flush code split was to avoid
multiple checks against s->flush_inputs_to_zero in the code. The was
possibly a good reason, comparing the before/after of float32_mul:
Dump of assembler code for function float32_mul:
0x0000000000934240 <+0>: movzbl 0x1(%rdx),%eax
0x0000000000934244 <+4>: test $0x20,%al
0x0000000000934246 <+6>: je 0x9342b0 <float32_mul+112>
0x0000000000934248 <+8>: cmpb $0x0,(%rdx)
0x000000000093424b <+11>: jne 0x9342b0 <float32_mul+112>
0x000000000093424d <+13>: cmpb $0x0,0x5(%rdx)
0x0000000000934251 <+17>: jne 0x9342d0 <float32_mul+144>
0x0000000000934253 <+19>: mov %edi,%eax
0x0000000000934255 <+21>: shr $0x17,%eax
0x0000000000934258 <+24>: add $0x1,%eax
0x000000000093425b <+27>: test $0xfe,%al
0x000000000093425d <+29>: je 0x9342a8 <float32_mul+104>
0x000000000093425f <+31>: mov %esi,%eax
0x0000000000934261 <+33>: shr $0x17,%eax
0x0000000000934264 <+36>: add $0x1,%eax
0x0000000000934267 <+39>: test $0xfe,%al
0x0000000000934269 <+41>: jne 0x934273 <float32_mul+51>
0x000000000093426b <+43>: test $0x7fffffff,%esi
0x0000000000934271 <+49>: jne 0x9342b0 <float32_mul+112>
0x0000000000934273 <+51>: mov %esi,-0xc(%rsp)
0x0000000000934277 <+55>: movss -0xc(%rsp),%xmm0
0x000000000093427d <+61>: mov %edi,-0xc(%rsp)
0x0000000000934281 <+65>: movss -0xc(%rsp),%xmm2
0x0000000000934287 <+71>: mulss %xmm2,%xmm0
0x000000000093428b <+75>: movd %xmm0,%eax
0x000000000093428f <+79>: andps 0x3b805a(%rip),%xmm0 # 0xcec2f0
0x0000000000934296 <+86>: ucomiss 0x3b8047(%rip),%xmm0 # 0xcec2e4
0x000000000093429d <+93>: jbe 0x9342b8 <float32_mul+120>
0x000000000093429f <+95>: orb $0x8,0x1(%rdx)
0x00000000009342a3 <+99>: retq
0x00000000009342a4 <+100>: nopl 0x0(%rax)
0x00000000009342a8 <+104>: test $0x7fffffff,%edi
0x00000000009342ae <+110>: je 0x93425f <float32_mul+31>
0x00000000009342b0 <+112>: jmpq 0x9290d0 <soft_f32_mul>
0x00000000009342b5 <+117>: nopl (%rax)
0x00000000009342b8 <+120>: movss 0x3b8020(%rip),%xmm1 # 0xcec2e0
0x00000000009342c0 <+128>: comiss %xmm0,%xmm1
0x00000000009342c3 <+131>: jae 0x934320 <float32_mul+224>
0x00000000009342c5 <+133>: retq
0x00000000009342c6 <+134>: nopw %cs:0x0(%rax,%rax,1)
0x00000000009342d0 <+144>: test $0x7f800000,%edi
0x00000000009342d6 <+150>: jne 0x9342f0 <float32_mul+176>
0x00000000009342d8 <+152>: test $0x7fffffff,%edi
0x00000000009342de <+158>: je 0x9342f0 <float32_mul+176>
0x00000000009342e0 <+160>: or $0x40,%eax
0x00000000009342e3 <+163>: and $0x80000000,%edi
0x00000000009342e9 <+169>: mov %al,0x1(%rdx)
0x00000000009342ec <+172>: nopl 0x0(%rax)
0x00000000009342f0 <+176>: test $0x7f800000,%esi
0x00000000009342f6 <+182>: jne 0x934253 <float32_mul+19>
0x00000000009342fc <+188>: test $0x7fffffff,%esi
0x0000000000934302 <+194>: je 0x934253 <float32_mul+19>
0x0000000000934308 <+200>: and $0x80000000,%esi
0x000000000093430e <+206>: orb $0x40,0x1(%rdx)
0x0000000000934312 <+210>: jmpq 0x934253 <float32_mul+19>
0x0000000000934317 <+215>: nopw 0x0(%rax,%rax,1)
0x0000000000934320 <+224>: mov %edi,%ecx
0x0000000000934322 <+226>: or %esi,%ecx
0x0000000000934324 <+228>: and $0x7fffffff,%ecx
0x000000000093432a <+234>: jne 0x9342b0 <float32_mul+112>
0x000000000093432c <+236>: jmp 0x9342c5 <float32_mul+133>
End of assembler dump.
And after this change:
Dump of assembler code for function float32_mul:
0x0000000000895d60 <+0>: movzbl 0x1(%rdx),%eax
0x0000000000895d64 <+4>: test $0x10,%al
0x0000000000895d66 <+6>: je 0x895e30 <float32_mul+208>
0x0000000000895d6c <+12>: cmpb $0x0,(%rdx)
0x0000000000895d6f <+15>: jne 0x895e30 <float32_mul+208>
0x0000000000895d75 <+21>: test $0x7f800000,%edi
0x0000000000895d7b <+27>: jne 0x895da0 <float32_mul+64>
0x0000000000895d7d <+29>: test $0x7fffffff,%edi
0x0000000000895d83 <+35>: je 0x895da0 <float32_mul+64>
0x0000000000895d85 <+37>: cmpb $0x0,0x5(%rdx)
0x0000000000895d89 <+41>: je 0x895e60 <float32_mul+256>
0x0000000000895d8f <+47>: or $0x40,%eax
0x0000000000895d92 <+50>: and $0x80000000,%edi
0x0000000000895d98 <+56>: mov %al,0x1(%rdx)
0x0000000000895d9b <+59>: nopl 0x0(%rax,%rax,1)
0x0000000000895da0 <+64>: test $0x7f800000,%esi
0x0000000000895da6 <+70>: jne 0x895dd0 <float32_mul+112>
0x0000000000895da8 <+72>: test $0x7fffffff,%esi
0x0000000000895dae <+78>: je 0x895dd0 <float32_mul+112>
0x0000000000895db0 <+80>: cmpb $0x0,0x5(%rdx)
0x0000000000895db4 <+84>: movzbl 0x1(%rdx),%eax
0x0000000000895db8 <+88>: je 0x895e50 <float32_mul+240>
0x0000000000895dbe <+94>: or $0x40,%eax
0x0000000000895dc1 <+97>: and $0x80000000,%esi
0x0000000000895dc7 <+103>: mov %al,0x1(%rdx)
0x0000000000895dca <+106>: nopw 0x0(%rax,%rax,1)
0x0000000000895dd0 <+112>: mov %edi,%eax
0x0000000000895dd2 <+114>: shr $0x17,%eax
0x0000000000895dd5 <+117>: add $0x1,%eax
0x0000000000895dd8 <+120>: test $0xfe,%al
0x0000000000895dda <+122>: je 0x895e28 <float32_mul+200>
0x0000000000895ddc <+124>: mov %esi,%eax
0x0000000000895dde <+126>: shr $0x17,%eax
0x0000000000895de1 <+129>: add $0x1,%eax
0x0000000000895de4 <+132>: test $0xfe,%al
0x0000000000895de6 <+134>: jne 0x895df0 <float32_mul+144>
0x0000000000895de8 <+136>: test $0x7fffffff,%esi
0x0000000000895dee <+142>: jne 0x895e30 <float32_mul+208>
0x0000000000895df0 <+144>: mov %esi,-0xc(%rsp)
0x0000000000895df4 <+148>: movss -0xc(%rsp),%xmm0
0x0000000000895dfa <+154>: mov %edi,-0xc(%rsp)
0x0000000000895dfe <+158>: movss -0xc(%rsp),%xmm2
0x0000000000895e04 <+164>: mulss %xmm2,%xmm0
0x0000000000895e08 <+168>: movd %xmm0,%eax
0x0000000000895e0c <+172>: andps 0x46bb5d(%rip),%xmm0 # 0xd01970
0x0000000000895e13 <+179>: ucomiss 0x46bb4a(%rip),%xmm0 # 0xd01964
0x0000000000895e1a <+186>: jbe 0x895e38 <float32_mul+216>
0x0000000000895e1c <+188>: orb $0x4,0x1(%rdx)
0x0000000000895e20 <+192>: retq
0x0000000000895e21 <+193>: nopl 0x0(%rax)
0x0000000000895e28 <+200>: test $0x7fffffff,%edi
0x0000000000895e2e <+206>: je 0x895ddc <float32_mul+124>
0x0000000000895e30 <+208>: jmpq 0x88a8c0 <soft_f32_mul>
0x0000000000895e35 <+213>: nopl (%rax)
0x0000000000895e38 <+216>: movss 0x46bb20(%rip),%xmm1 # 0xd01960
0x0000000000895e40 <+224>: comiss %xmm0,%xmm1
0x0000000000895e43 <+227>: jae 0x895e70 <float32_mul+272>
0x0000000000895e45 <+229>: retq
0x0000000000895e46 <+230>: nopw %cs:0x0(%rax,%rax,1)
0x0000000000895e50 <+240>: or $0x20,%eax
0x0000000000895e53 <+243>: mov %al,0x1(%rdx)
0x0000000000895e56 <+246>: jmpq 0x895dd0 <float32_mul+112>
0x0000000000895e5b <+251>: nopl 0x0(%rax,%rax,1)
0x0000000000895e60 <+256>: or $0x20,%eax
0x0000000000895e63 <+259>: mov %al,0x1(%rdx)
0x0000000000895e66 <+262>: jmpq 0x895da0 <float32_mul+64>
0x0000000000895e6b <+267>: nopl 0x0(%rax,%rax,1)
0x0000000000895e70 <+272>: mov %esi,%ecx
0x0000000000895e72 <+274>: or %edi,%ecx
0x0000000000895e74 <+276>: and $0x7fffffff,%ecx
0x0000000000895e7a <+282>: jne 0x895e30 <float32_mul+208>
0x0000000000895e7c <+284>: jmp 0x895e45 <float32_mul+229>
End of assembler dump.
However I'm not sure how much of that increase is down to the change of
macro expansion and how much is due to the extra leg for the flushing.
Anyway other than that observation seems OK to me:
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
--
Alex Bennée
Alex Bennée <alex.bennee@linaro.org> writes: > Richard Henderson <richard.henderson@linaro.org> writes: > >> Create a new exception flag for reporting input denormals that are not >> flushed to zero, they are normalized and treated as normal numbers. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> <snip> > > Anyway other than that observation seems OK to me: > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> I of course meant: Reviewed-by: Alex Bennée <alex.bennee@linaro.org> -- Alex Bennée
On 6/7/21 8:35 AM, Alex Bennée wrote: > So I'm guessing Emilio had the original flush code split was to avoid > multiple checks against s->flush_inputs_to_zero in the code. The was > possibly a good reason, comparing the before/after of float32_mul: I assumed that the most important thing now is that we test floatN_is_denormal only once -- the test for flush_inputs_to_zero is fairly trivial. If you've got a better ordering of operations for this, do tell. r~
Richard Henderson <richard.henderson@linaro.org> writes: > On 6/7/21 8:35 AM, Alex Bennée wrote: >> So I'm guessing Emilio had the original flush code split was to avoid >> multiple checks against s->flush_inputs_to_zero in the code. The was >> possibly a good reason, comparing the before/after of float32_mul: > > I assumed that the most important thing now is that we test > floatN_is_denormal only once -- the test for flush_inputs_to_zero is > fairly trivial. > > If you've got a better ordering of operations for this, do tell. What I really want is to know which instructions translate into the if (s->flush_inputs_to_zero) and verifying that is only checked once. Maybe I'm just suspicious of compilers ability to optimise things away... -- Alex Bennée
On 6/7/21 10:19 AM, Alex Bennée wrote: >> If you've got a better ordering of operations for this, do tell. > > What I really want is to know which instructions translate into the if > (s->flush_inputs_to_zero) and verifying that is only checked once. Maybe > I'm just suspicious of compilers ability to optimise things away... > Dump of assembler code for function float32_mul: > 0x0000000000895d60 <+0>: movzbl 0x1(%rdx),%eax > 0x0000000000895d64 <+4>: test $0x10,%al > 0x0000000000895d66 <+6>: je 0x895e30 <float32_mul+208> s->float_exception_flags & float_flag_inexact > 0x0000000000895d6c <+12>: cmpb $0x0,(%rdx) > 0x0000000000895d6f <+15>: jne 0x895e30 <float32_mul+208> s->float_rounding_mode == float_round_nearest_even > 0x0000000000895d75 <+21>: test $0x7f800000,%edi > 0x0000000000895d7b <+27>: jne 0x895da0 <float32_mul+64> > 0x0000000000895d7d <+29>: test $0x7fffffff,%edi > 0x0000000000895d83 <+35>: je 0x895da0 <float32_mul+64> float32_is_denormal > 0x0000000000895d85 <+37>: cmpb $0x0,0x5(%rdx) > 0x0000000000895d89 <+41>: je 0x895e60 <float32_mul+256> s->flush_inputs_to_zero > 0x0000000000895d8f <+47>: or $0x40,%eax > 0x0000000000895d92 <+50>: and $0x80000000,%edi > 0x0000000000895d98 <+56>: mov %al,0x1(%rdx) flush-to-zero and set iflush_denormal > 0x0000000000895da0 <+64>: test $0x7f800000,%esi > 0x0000000000895da6 <+70>: jne 0x895dd0 <float32_mul+112> > 0x0000000000895da8 <+72>: test $0x7fffffff,%esi > 0x0000000000895dae <+78>: je 0x895dd0 <float32_mul+112> float32_is_denormal (second operand) > 0x0000000000895db0 <+80>: cmpb $0x0,0x5(%rdx) > 0x0000000000895db4 <+84>: movzbl 0x1(%rdx),%eax > 0x0000000000895db8 <+88>: je 0x895e50 <float32_mul+240> > 0x0000000000895dbe <+94>: or $0x40,%eax > 0x0000000000895dc1 <+97>: and $0x80000000,%esi s->flush_inputs_to_zero, flush-to-zero, set iflush_denormal. ... > 0x0000000000895e50 <+240>: or $0x20,%eax > 0x0000000000895e53 <+243>: mov %al,0x1(%rdx) > 0x0000000000895e56 <+246>: jmpq 0x895dd0 <float32_mul+112> set inorm_denormal (second operand) > 0x0000000000895e60 <+256>: or $0x20,%eax > 0x0000000000895e63 <+259>: mov %al,0x1(%rdx) > 0x0000000000895e66 <+262>: jmpq 0x895da0 <float32_mul+64> set inorm_denormal (first operand) There do seem to be 3 reads/writes to exception_flags for float_raise. r~
I'm probably missing something, but why do we need both "float_flag_inorm_denormal" and "float_flag_iflush_denormal"?
Couldn't the code that sets these flags set just a single flag for all denormal inputs and the code that checks these flags check that single flag
combined with the "flush_inputs_to_zero" flag to accomplish what the two separate "input denormal" flags do?
Michael
-----Original Message-----
From: Richard Henderson <richard.henderson@linaro.org>
Sent: Wednesday, May 26, 2021 9:14 PM
To: qemu-devel@nongnu.org
Cc: Michael Morrell <mmorrell@tachyum.com>; alex.bennee@linaro.org
Subject: [PATCH 03/11] softfloat: Introduce float_flag_inorm_denormal
Create a new exception flag for reporting input denormals that are not flushed to zero, they are normalized and treated as normal numbers.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-types.h | 15 ++++---
fpu/softfloat.c | 84 +++++++++++------------------------
fpu/softfloat-parts.c.inc | 1 +
3 files changed, 36 insertions(+), 64 deletions(-)
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h index e2d70ff556..174100e50e 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -143,13 +143,14 @@ typedef enum __attribute__((__packed__)) {
*/
enum {
- float_flag_invalid = 1,
- float_flag_divbyzero = 4,
- float_flag_overflow = 8,
- float_flag_underflow = 16,
- float_flag_inexact = 32,
- float_flag_iflush_denormal = 64,
- float_flag_oflush_denormal = 128
+ float_flag_invalid = 0x0001,
+ float_flag_divbyzero = 0x0002,
+ float_flag_overflow = 0x0004,
+ float_flag_underflow = 0x0008,
+ float_flag_inexact = 0x0010,
+ float_flag_inorm_denormal = 0x0020, /* denormal input, normalized */
+ float_flag_iflush_denormal = 0x0040, /* denormal input, flushed to zero */
+ float_flag_oflush_denormal = 0x0080, /* denormal result, flushed
+ to zero */
};
/*
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index cb077cf111..e54cdb274d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -126,61 +126,23 @@ this code that are retained.
* denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
* and the result is < the minimum normal.
*/
-#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
+
+#define GEN_INPUT_FLUSH(name, soft_t) \
static inline void name(soft_t *a, float_status *s) \
{ \
if (unlikely(soft_t ## _is_denormal(*a))) { \
- *a = soft_t ## _set_sign(soft_t ## _zero, \
- soft_t ## _is_neg(*a)); \
- float_raise(float_flag_iflush_denormal, s); \
+ if (s->flush_inputs_to_zero) { \
+ *a = soft_t ## _set_sign(0, soft_t ## _is_neg(*a)); \
+ float_raise(float_flag_iflush_denormal, s); \
+ } else { \
+ float_raise(float_flag_inorm_denormal, s); \
+ } \
} \
}
-GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32) -GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64) -#undef GEN_INPUT_FLUSH__NOCHECK
-
-#define GEN_INPUT_FLUSH1(name, soft_t) \
- static inline void name(soft_t *a, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- }
-
-GEN_INPUT_FLUSH1(float32_input_flush1, float32) -GEN_INPUT_FLUSH1(float64_input_flush1, float64) -#undef GEN_INPUT_FLUSH1
-
-#define GEN_INPUT_FLUSH2(name, soft_t) \
- static inline void name(soft_t *a, soft_t *b, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- soft_t ## _input_flush__nocheck(b, s); \
- }
-
-GEN_INPUT_FLUSH2(float32_input_flush2, float32) -GEN_INPUT_FLUSH2(float64_input_flush2, float64) -#undef GEN_INPUT_FLUSH2
-
-#define GEN_INPUT_FLUSH3(name, soft_t) \
- static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- soft_t ## _input_flush__nocheck(b, s); \
- soft_t ## _input_flush__nocheck(c, s); \
- }
-
-GEN_INPUT_FLUSH3(float32_input_flush3, float32) -GEN_INPUT_FLUSH3(float64_input_flush3, float64) -#undef GEN_INPUT_FLUSH3
+GEN_INPUT_FLUSH(float32_input_flush, float32)
+GEN_INPUT_FLUSH(float64_input_flush, float64) #undef GEN_INPUT_FLUSH
/*
* Choose whether to use fpclassify or float32/64_* primitives in the generated @@ -353,7 +315,8 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
goto soft;
}
- float32_input_flush2(&ua.s, &ub.s, s);
+ float32_input_flush(&ua.s, s);
+ float32_input_flush(&ub.s, s);
if (unlikely(!pre(ua, ub))) {
goto soft;
}
@@ -384,7 +347,8 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
goto soft;
}
- float64_input_flush2(&ua.s, &ub.s, s);
+ float64_input_flush(&ua.s, s);
+ float64_input_flush(&ub.s, s);
if (unlikely(!pre(ua, ub))) {
goto soft;
}
@@ -2161,7 +2125,9 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
goto soft;
}
- float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
+ float32_input_flush(&ua.s, s);
+ float32_input_flush(&ub.s, s);
+ float32_input_flush(&uc.s, s);
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
goto soft;
}
@@ -2232,7 +2198,9 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
goto soft;
}
- float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
+ float64_input_flush(&ua.s, s);
+ float64_input_flush(&ub.s, s);
+ float64_input_flush(&uc.s, s);
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
goto soft;
}
@@ -3988,7 +3956,8 @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
goto soft;
}
- float32_input_flush2(&ua.s, &ub.s, s);
+ float32_input_flush(&ua.s, s);
+ float32_input_flush(&ub.s, s);
if (isgreaterequal(ua.h, ub.h)) {
if (isgreater(ua.h, ub.h)) {
return float_relation_greater; @@ -4038,7 +4007,8 @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
goto soft;
}
- float64_input_flush2(&ua.s, &ub.s, s);
+ float64_input_flush(&ua.s, s);
+ float64_input_flush(&ub.s, s);
if (isgreaterequal(ua.h, ub.h)) {
if (isgreater(ua.h, ub.h)) {
return float_relation_greater; @@ -4230,7 +4200,7 @@ float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
goto soft;
}
- float32_input_flush1(&ua.s, s);
+ float32_input_flush(&ua.s, s);
if (QEMU_HARDFLOAT_1F32_USE_FP) {
if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
fpclassify(ua.h) == FP_ZERO) || @@ -4257,7 +4227,7 @@ float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
goto soft;
}
- float64_input_flush1(&ua.s, s);
+ float64_input_flush(&ua.s, s);
if (QEMU_HARDFLOAT_1F64_USE_FP) {
if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
fpclassify(ua.h) == FP_ZERO) || diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc index 72e2b24539..16d4425419 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -119,6 +119,7 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
int shift = frac_normalize(p);
p->cls = float_class_normal;
p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
+ float_raise(float_flag_inorm_denormal, status);
}
} else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
p->cls = float_class_normal;
--
2.25.1
On 5/28/21 10:41 AM, Michael Morrell wrote: > I'm probably missing something, but why do we need both "float_flag_inorm_denormal" and "float_flag_iflush_denormal"? > > Couldn't the code that sets these flags set just a single flag for all denormal inputs and the code that checks these flags check that single flag > combined with the "flush_inputs_to_zero" flag to accomplish what the two separate "input denormal" flags do? The thing that you're missing is that many guests leave the accumulated softfloat exceptions in the float_status structure until the guest FPSCR register is read. Unless the guest needs to raise an exception immediately, there's no reason to do otherwise. With this setup, you have no temporal connection between "any denormal" and "flush-to-zero is set", thus two flags. r~
Just curious. What's the expected timeline to get these denormal patches in the tree? -----Original Message----- From: Richard Henderson <richard.henderson@linaro.org> Sent: Saturday, May 29, 2021 8:21 AM To: Michael Morrell <mmorrell@tachyum.com>; qemu-devel@nongnu.org Cc: alex.bennee@linaro.org Subject: Re: [PATCH 03/11] softfloat: Introduce float_flag_inorm_denormal On 5/28/21 10:41 AM, Michael Morrell wrote: > I'm probably missing something, but why do we need both "float_flag_inorm_denormal" and "float_flag_iflush_denormal"? > > Couldn't the code that sets these flags set just a single flag for all > denormal inputs and the code that checks these flags check that single flag combined with the "flush_inputs_to_zero" flag to accomplish what the two separate "input denormal" flags do? The thing that you're missing is that many guests leave the accumulated softfloat exceptions in the float_status structure until the guest FPSCR register is read. Unless the guest needs to raise an exception immediately, there's no reason to do otherwise. With this setup, you have no temporal connection between "any denormal" and "flush-to-zero is set", thus two flags. r~
On 7/14/21 9:44 AM, Michael Morrell wrote: > Just curious. What's the expected timeline to get these denormal patches in the tree? Next development cycle, at minimum. I need to fix the problems vs NaNs that you identified. r~
OK, thanks for the update. I also appreciate you looking at the NaN issue. Michael -----Original Message----- From: Richard Henderson <richard.henderson@linaro.org> Sent: Wednesday, July 14, 2021 9:57 AM To: Michael Morrell <mmorrell@tachyum.com>; qemu-devel@nongnu.org Subject: Re: [PATCH 03/11] softfloat: Introduce float_flag_inorm_denormal On 7/14/21 9:44 AM, Michael Morrell wrote: > Just curious. What's the expected timeline to get these denormal patches in the tree? Next development cycle, at minimum. I need to fix the problems vs NaNs that you identified. r~
Richard, It's been 6 months so I thought I'd check in again. Do you have an estimate of when this will go in? Michael -----Original Message----- From: Michael Morrell Sent: Wednesday, July 14, 2021 10:50 AM To: 'Richard Henderson' <richard.henderson@linaro.org>; qemu-devel@nongnu.org Subject: RE: [PATCH 03/11] softfloat: Introduce float_flag_inorm_denormal OK, thanks for the update. I also appreciate you looking at the NaN issue. Michael -----Original Message----- From: Richard Henderson <richard.henderson@linaro.org> Sent: Wednesday, July 14, 2021 9:57 AM To: Michael Morrell <mmorrell@tachyum.com>; qemu-devel@nongnu.org Subject: Re: [PATCH 03/11] softfloat: Introduce float_flag_inorm_denormal On 7/14/21 9:44 AM, Michael Morrell wrote: > Just curious. What's the expected timeline to get these denormal patches in the tree? Next development cycle, at minimum. I need to fix the problems vs NaNs that you identified. r~
© 2016 - 2026 Red Hat, Inc.