target/s390x/fpu_helper.c | 51 +++++++++++++++++++++++++++++++++++++++++++---- target/s390x/translate.c | 39 +++++++++++------------------------- 2 files changed, 59 insertions(+), 31 deletions(-)
The FP-to-integer conversion instructions need to set CC 3 whenever
a "special case" occurs; this is the case whenever the instruction
also signals the IEEE invalid exception. (See e.g. figure 19-18
in the Principles of Operation.)
However, qemu currently will set CC 3 only in the case where the
input was a NaN. This is indeed one of the special cases, but
there are others, most notably the case where the input is out
of range of the target data type.
This patch fixes the problem by switching these instructions to
the "static" CC method and computing the correct result directly
in the helper. (It cannot be re-computed later as the information
about the invalid exception is no longer available.)
This fixes a bug observed when running the wasmtime test suite
under the s390x-linux-user target.
Signed-off-by: Ulrich Weigand <ulrich.weigand@de.ibm.com>
---
target/s390x/fpu_helper.c | 51 +++++++++++++++++++++++++++++++++++++++++++----
target/s390x/translate.c | 39 +++++++++++-------------------------
2 files changed, 59 insertions(+), 31 deletions(-)
diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
index 13af158..50250cf 100644
--- a/target/s390x/fpu_helper.c
+++ b/target/s390x/fpu_helper.c
@@ -168,6 +168,34 @@ uint32_t set_cc_nz_f128(float128 v)
}
}
+/* condition codes for FP to integer conversion ops */
+static uint32_t set_cc_conv_f32(float32 v, float_status *stat)
+{
+ if (stat->float_exception_flags & float_flag_invalid) {
+ return 3;
+ } else {
+ return set_cc_nz_f32(v);
+ }
+}
+
+static uint32_t set_cc_conv_f64(float64 v, float_status *stat)
+{
+ if (stat->float_exception_flags & float_flag_invalid) {
+ return 3;
+ } else {
+ return set_cc_nz_f64(v);
+ }
+}
+
+static uint32_t set_cc_conv_f128(float128 v, float_status *stat)
+{
+ if (stat->float_exception_flags & float_flag_invalid) {
+ return 3;
+ } else {
+ return set_cc_nz_f128(v);
+ }
+}
+
static inline uint8_t round_from_m34(uint32_t m34)
{
return extract32(m34, 0, 4);
@@ -506,6 +534,7 @@ uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
int64_t ret = float32_to_int64(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f32(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -520,6 +549,7 @@ uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
int64_t ret = float64_to_int64(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f64(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -535,6 +565,7 @@ uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
float128 v2 = make_float128(h, l);
int64_t ret = float128_to_int64(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f128(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -549,6 +580,7 @@ uint64_t HELPER(cfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
int32_t ret = float32_to_int32(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f32(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -563,6 +595,7 @@ uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
int32_t ret = float64_to_int32(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f64(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -578,6 +611,7 @@ uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
float128 v2 = make_float128(h, l);
int32_t ret = float128_to_int32(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f128(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -592,6 +626,8 @@ uint64_t HELPER(clgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
uint64_t ret = float32_to_uint64(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f32(v2, &env->fpu_status);
+
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
if (float32_is_any_nan(v2)) {
@@ -605,6 +641,7 @@ uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
uint64_t ret = float64_to_uint64(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f64(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -618,11 +655,13 @@ uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
- uint64_t ret = float128_to_uint64(make_float128(h, l), &env->fpu_status);
+ float128 v2 = make_float128(h, l);
+ uint64_t ret = float128_to_uint64(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f128(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
- if (float128_is_any_nan(make_float128(h, l))) {
+ if (float128_is_any_nan(v2)) {
return 0;
}
return ret;
@@ -633,6 +672,7 @@ uint64_t HELPER(clfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
uint32_t ret = float32_to_uint32(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f32(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -647,6 +687,7 @@ uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
uint32_t ret = float64_to_uint32(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f64(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
@@ -660,11 +701,13 @@ uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
{
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
- uint32_t ret = float128_to_uint32(make_float128(h, l), &env->fpu_status);
+ float128 v2 = make_float128(h, l);
+ uint32_t ret = float128_to_uint32(v2, &env->fpu_status);
+ env->cc_op = set_cc_conv_f128(v2, &env->fpu_status);
s390_restore_bfp_rounding_mode(env, old_mode);
handle_exceptions(env, xxc_from_m34(m34), GETPC());
- if (float128_is_any_nan(make_float128(h, l))) {
+ if (float128_is_any_nan(v2)) {
return 0;
}
return ret;
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index e243624..53129fb 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -563,21 +563,6 @@ static void set_cc_nz_u64(DisasContext *s, TCGv_i64 val)
gen_op_update1_cc_i64(s, CC_OP_NZ, val);
}
-static void gen_set_cc_nz_f32(DisasContext *s, TCGv_i64 val)
-{
- gen_op_update1_cc_i64(s, CC_OP_NZ_F32, val);
-}
-
-static void gen_set_cc_nz_f64(DisasContext *s, TCGv_i64 val)
-{
- gen_op_update1_cc_i64(s, CC_OP_NZ_F64, val);
-}
-
-static void gen_set_cc_nz_f128(DisasContext *s, TCGv_i64 vh, TCGv_i64 vl)
-{
- gen_op_update2_cc_i64(s, CC_OP_NZ_F128, vh, vl);
-}
-
/* CC value is in env->cc_op */
static void set_cc_static(DisasContext *s)
{
@@ -1836,7 +1821,7 @@ static DisasJumpType op_cfeb(DisasContext *s, DisasOps *o)
}
gen_helper_cfeb(o->out, cpu_env, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f32(s, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1849,7 +1834,7 @@ static DisasJumpType op_cfdb(DisasContext *s, DisasOps *o)
}
gen_helper_cfdb(o->out, cpu_env, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f64(s, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1862,7 +1847,7 @@ static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o)
}
gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f128(s, o->in1, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1875,7 +1860,7 @@ static DisasJumpType op_cgeb(DisasContext *s, DisasOps *o)
}
gen_helper_cgeb(o->out, cpu_env, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f32(s, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1888,7 +1873,7 @@ static DisasJumpType op_cgdb(DisasContext *s, DisasOps *o)
}
gen_helper_cgdb(o->out, cpu_env, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f64(s, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1901,7 +1886,7 @@ static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o)
}
gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f128(s, o->in1, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1914,7 +1899,7 @@ static DisasJumpType op_clfeb(DisasContext *s, DisasOps *o)
}
gen_helper_clfeb(o->out, cpu_env, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f32(s, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1927,7 +1912,7 @@ static DisasJumpType op_clfdb(DisasContext *s, DisasOps *o)
}
gen_helper_clfdb(o->out, cpu_env, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f64(s, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1940,7 +1925,7 @@ static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o)
}
gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f128(s, o->in1, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1953,7 +1938,7 @@ static DisasJumpType op_clgeb(DisasContext *s, DisasOps *o)
}
gen_helper_clgeb(o->out, cpu_env, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f32(s, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1966,7 +1951,7 @@ static DisasJumpType op_clgdb(DisasContext *s, DisasOps *o)
}
gen_helper_clgdb(o->out, cpu_env, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f64(s, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
@@ -1979,7 +1964,7 @@ static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o)
}
gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34);
tcg_temp_free_i32(m34);
- gen_set_cc_nz_f128(s, o->in1, o->in2);
+ set_cc_static(s);
return DISAS_NEXT;
}
--
1.8.3.1
--
Dr. Ulrich Weigand
GNU/Linux compilers and toolchain
Ulrich.Weigand@de.ibm.com
On 6/23/21 7:50 AM, Ulrich Weigand wrote: > @@ -506,6 +534,7 @@ uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34) > { > int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); > int64_t ret = float32_to_int64(v2, &env->fpu_status); > + env->cc_op = set_cc_conv_f32(v2, &env->fpu_status); ... > @@ -1875,7 +1860,7 @@ static DisasJumpType op_cgeb(DisasContext *s, DisasOps *o) > } > gen_helper_cgeb(o->out, cpu_env, o->in2, m34); > tcg_temp_free_i32(m34); > - gen_set_cc_nz_f32(s, o->in2); > + set_cc_static(s); > return DISAS_NEXT; ... > helper.h:DEF_HELPER_FLAGS_3(clgdb, TCG_CALL_NO_WG, i64, env, i64, i32) This won't work reliably. You're writing to a tcg global inside of a function that says that it won't. It's probably time to take care of > /* > * FIXME: > * 1. Right now, all inexact conditions are inidicated as > * "truncated" (0) and never as "incremented" (1) in the DXC. > * 2. Only traps due to invalid/divbyzero are suppressing. Other traps > * are completing, meaning the target register has to be written! > * This, however will mean that we have to write the register before > * triggering the trap - impossible right now. > */ point 2, by splitting the fpu helpers. In the first part, take care of the optimization and suppressed traps, and return the register value. In the second part, take care of FPC write-back, completing traps, and return any cc value. Which you can then assign, properly, to the cc_op tcg global. BTW, we can now improve fpu performance by keeping masked exceptions in the float_status. Or at least inexact. Once that's set, we allow softfloat.c to use host floating-point insns under certain conditions. I'm sure this code pre-dates that, so it made sense to clear it all out at the time. r~
On Fri, Jun 25, 2021 at 06:19:48PM -0700, Richard Henderson wrote: > On 6/23/21 7:50 AM, Ulrich Weigand wrote: > >@@ -506,6 +534,7 @@ uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34) > > { > > int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); > > int64_t ret = float32_to_int64(v2, &env->fpu_status); > >+ env->cc_op = set_cc_conv_f32(v2, &env->fpu_status); > ... > > >@@ -1875,7 +1860,7 @@ static DisasJumpType op_cgeb(DisasContext *s, DisasOps *o) > > } > > gen_helper_cgeb(o->out, cpu_env, o->in2, m34); > > tcg_temp_free_i32(m34); > >- gen_set_cc_nz_f32(s, o->in2); > >+ set_cc_static(s); > > return DISAS_NEXT; > > ... > > >helper.h:DEF_HELPER_FLAGS_3(clgdb, TCG_CALL_NO_WG, i64, env, i64, i32) > > This won't work reliably. You're writing to a tcg global inside of > a function that says that it won't. I missed that, sorry. That problem can be fixed by changing the above line to something like: DEF_HELPER_3(clgdb, i64, env, i64, i32) right? > It's probably time to take care of > > > /* > > * FIXME: > > * 1. Right now, all inexact conditions are inidicated as > > * "truncated" (0) and never as "incremented" (1) in the DXC. > > * 2. Only traps due to invalid/divbyzero are suppressing. Other traps > > * are completing, meaning the target register has to be written! > > * This, however will mean that we have to write the register before > > * triggering the trap - impossible right now. > > */ > > point 2, by splitting the fpu helpers. In the first part, take care > of the optimization and suppressed traps, and return the register > value. In the second part, take care of FPC write-back, completing > traps, and return any cc value. Which you can then assign, > properly, to the cc_op tcg global. I'm not sure I see how this is related to the CC problem. Note that the difference between suppressing and completing traps applies to both the target register and the CC - if a suppressing trap is triggered, both target register and CC remain unchanged, otherwise they are changed (and both changes are visible to the trap handler). In any case, the current implementation already has two helpers, and I initially tried to keep that, by using a different second part to correctly compute CC. But this ran into the problem that I didn't see any way to detect the fact that the conversion operation had run into one of the special cases in the second helper, without re-doing the whole conversion a second time. Is there any way to pass information between the two helpers (without running again into the same qemu global state updating problem)? Bye, Ulrich -- Dr. Ulrich Weigand GNU/Linux compilers and toolchain Ulrich.Weigand@de.ibm.com
On 6/28/21 5:58 AM, Ulrich Weigand wrote: >>> helper.h:DEF_HELPER_FLAGS_3(clgdb, TCG_CALL_NO_WG, i64, env, i64, i32) >> >> This won't work reliably. You're writing to a tcg global inside of >> a function that says that it won't. > > I missed that, sorry. That problem can be fixed by changing the above > line to something like: > DEF_HELPER_3(clgdb, i64, env, i64, i32) > right? Yes. > In any case, the current implementation already has two helpers, and > I initially tried to keep that, by using a different second part to > correctly compute CC. But this ran into the problem that I didn't > see any way to detect the fact that the conversion operation had run > into one of the special cases in the second helper, without re-doing > the whole conversion a second time. Is there any way to pass > information between the two helpers (without running again into the > same qemu global state updating problem)? Don't clear out env->fpu_status.float_exception_flags in handle_exceptions. Wait until we're actually done with the data. r~
On Mon, Jun 28, 2021 at 06:26:52AM -0700, Richard Henderson wrote: > On 6/28/21 5:58 AM, Ulrich Weigand wrote: > >>>helper.h:DEF_HELPER_FLAGS_3(clgdb, TCG_CALL_NO_WG, i64, env, i64, i32) > >> > >>This won't work reliably. You're writing to a tcg global inside of > >>a function that says that it won't. > > > >I missed that, sorry. That problem can be fixed by changing the above > >line to something like: > >DEF_HELPER_3(clgdb, i64, env, i64, i32) > >right? > > Yes. OK, I'll send a v2 including that change shortly; maybe that is an acceptable fix for the immediate bug, at least for now. > >In any case, the current implementation already has two helpers, and > >I initially tried to keep that, by using a different second part to > >correctly compute CC. But this ran into the problem that I didn't > >see any way to detect the fact that the conversion operation had run > >into one of the special cases in the second helper, without re-doing > >the whole conversion a second time. Is there any way to pass > >information between the two helpers (without running again into the > >same qemu global state updating problem)? > > Don't clear out env->fpu_status.float_exception_flags in > handle_exceptions. Wait until we're actually done with the data. I don't really know much about qemu internals, but this is really confusing me, sorry. Aren't env->fpu_status and env->cc_op two elements of the same global state? Why it is OK to use one of these fields to pass information to the next helper, but not the other? I guess I must be missing something here ... Bye, Ulrich -- Dr. Ulrich Weigand GNU/Linux compilers and toolchain Ulrich.Weigand@de.ibm.com
On 6/28/21 9:32 AM, Ulrich Weigand wrote: >> Don't clear out env->fpu_status.float_exception_flags in >> handle_exceptions. Wait until we're actually done with the data. > > I don't really know much about qemu internals, but this is really > confusing me, sorry. Aren't env->fpu_status and env->cc_op two > elements of the same global state? Why it is OK to use one of > these fields to pass information to the next helper, but not the > other? I guess I must be missing something here ... One of them has cc_op = tcg_global_mem_new_i32(cpu_env, offsetof(CPUS390XState, cc_op), "cc_op"); which makes it a TCG Global, which may not be modified at will, and one of them does not. r~
On Mon, Jun 28, 2021 at 10:49:52AM -0700, Richard Henderson wrote: > On 6/28/21 9:32 AM, Ulrich Weigand wrote: > >>Don't clear out env->fpu_status.float_exception_flags in > >>handle_exceptions. Wait until we're actually done with the data. > > > >I don't really know much about qemu internals, but this is really > >confusing me, sorry. Aren't env->fpu_status and env->cc_op two > >elements of the same global state? Why it is OK to use one of > >these fields to pass information to the next helper, but not the > >other? I guess I must be missing something here ... > > One of them has > > cc_op = tcg_global_mem_new_i32(cpu_env, offsetof(CPUS390XState, cc_op), > "cc_op"); > > which makes it a TCG Global, which may not be modified at will, and one of them does not. Got it, thanks! Bye, Ulrich -- Dr. Ulrich Weigand GNU/Linux compilers and toolchain Ulrich.Weigand@de.ibm.com
© 2016 - 2024 Red Hat, Inc.