[v2] target/s390x: Implement DIVIDE TO INTEGER

[PATCH v2 3/4] target/s390x: Implement DIVIDE TO INTEGER

Posted by Ilya Leoshkevich 1 week, 5 days ago

DIVIDE TO INTEGER computes floating point remainder and is used by
LuaJIT, so add it to QEMU.

The instruction comes in two flavors: for floats and doubles, which are
very similar. Since it's also quite complex, copy-pasting the
implementation would result in barely maintainable code. Mitigate that
using macros. An alternative would be an .inc file, but this looks like
an overkill.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 fpu/softfloat.c                  | 158 +++++++++++++++++++++++++++++++
 include/fpu/softfloat.h          |  11 +++
 target/s390x/helper.h            |   1 +
 target/s390x/tcg/fpu_helper.c    |  56 +++++++++++
 target/s390x/tcg/insn-data.h.inc |   5 +-
 target/s390x/tcg/translate.c     |  26 +++++
 6 files changed, 256 insertions(+), 1 deletion(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 8094358c2e4..178ea262057 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -5361,6 +5361,164 @@ floatx80 floatx80_round(floatx80 a, float_status *status)
     return floatx80_round_pack_canonical(&p, status);
 }
 
+static void parts_s390_precision_round_normal(FloatParts128 *p,
+                                              const FloatFmt *fmt)
+{
+    /* Use precision of the target format, but unbounded exponent range */
+    p->frac_hi &= ((1ULL << (fmt->frac_size + 1)) - 1) <<
+                  (63 - fmt->frac_size);
+    p->frac_lo = 0;
+}
+
+static void parts_s390_divide_to_integer(FloatParts64 *a, FloatParts64 *b,
+                                         int final_quotient_rounding_mode,
+                                         bool mask_underflow, bool mask_inexact,
+                                         const FloatFmt *fmt,
+                                         FloatParts64 *r, FloatParts64 *n,
+                                         uint32_t *cc, int *dxc,
+                                         float_status *status)
+{
+    /* POp table "Results: DIVIDE TO INTEGER (Part 1 of 2)" */
+    if ((float_cmask(a->cls) | float_cmask(b->cls)) & float_cmask_anynan) {
+        *r = *parts_pick_nan(a, b, status);
+        *n = *r;
+        *cc = 1;
+    } else if (a->cls == float_class_inf || b->cls == float_class_zero) {
+        parts_default_nan(r, status);
+        *n = *r;
+        *cc = 1;
+        status->float_exception_flags |= float_flag_invalid;
+    } else if (b->cls == float_class_inf) {
+        *r = *a;
+        n->cls = float_class_zero;
+        n->sign = a->sign ^ b->sign;
+        *cc = 0;
+    } else {
+        FloatParts128 a128, b128, *m128, m128_buf, n128, *q128, q128_buf,
+                      r128, *r128_precise;
+        int float_exception_flags = 0;
+        bool is_q128_smallish;
+        uint32_t r_flags;
+        int saved_flags;
+
+        /* Compute precise quotient */
+        parts_float_to_float_widen(&a128, a, status);
+        parts_float_to_float_widen(&b128, b, status);
+        q128_buf = a128;
+        q128 = parts_div(&q128_buf, &b128, status);
+
+        /* Final or partial case? */
+        is_q128_smallish = q128->exp < (fmt->frac_size + 1);
+
+        /*
+         * Final quotient is rounded using final-quotient-rounding method, and
+         * partial quotient is rounded toward zero.
+         *
+         * Rounding of partial quotient may be inexact. This is the whole point
+         * of distinguishing partial quotients, so ignore the exception.
+         */
+        n128 = *q128;
+        saved_flags = status->float_exception_flags;
+        parts_round_to_int(&n128,
+                           is_q128_smallish ? final_quotient_rounding_mode :
+                                              float_round_to_zero,
+                           0, status, &float128_params);
+        float_exception_flags = saved_flags;
+        parts_s390_precision_round_normal(&n128, fmt);
+
+        /* Compute precise remainder */
+        m128_buf = b128;
+        m128 = parts_mul(&m128_buf, &n128, status);
+        m128->sign = !m128->sign;
+        status->float_exception_flags = 0;
+        r128_precise = parts_addsub(m128, &a128, status, false);
+
+        /* Round remainder to the target format */
+        parts_float_to_float_narrow(r, r128_precise, status);
+        parts_uncanon(r, status, fmt);
+        r->frac &= (1ULL << fmt->frac_size) - 1;
+        parts_canonicalize(r, status, fmt);
+        parts_float_to_float_widen(&r128, r, status);
+        r_flags = status->float_exception_flags;
+
+        /* POp table "Results: DIVIDE TO INTEGER (Part 2 of 2)" */
+        if (is_q128_smallish) {
+            if (r128.cls != float_class_zero) {
+                if (r128.exp < 2 - (1 << (fmt->exp_size - 1))) {
+                    if (mask_underflow) {
+                        float_exception_flags |= float_flag_underflow;
+                        *dxc = 0x10;
+                        r128.exp += fmt->exp_re_bias;
+                    }
+                } else if (r_flags & float_flag_inexact) {
+                    float_exception_flags |= float_flag_inexact;
+                    if (mask_inexact) {
+                        bool saved_r128_sign, saved_r128_precise_sign;
+
+                        /*
+                         * Check whether remainder was truncated (rounded
+                         * toward zero) or incremented.
+                         */
+                        saved_r128_sign = r128.sign;
+                        saved_r128_precise_sign = r128_precise->sign;
+                        r128.sign = false;
+                        r128_precise->sign = false;
+                        if (parts_compare(&r128, r128_precise, status, true) <
+                            float_relation_equal) {
+                            *dxc = 0x8;
+                        } else {
+                            *dxc = 0xc;
+                        }
+                        r128.sign = saved_r128_sign;
+                        r128_precise->sign = saved_r128_precise_sign;
+                    }
+                }
+            }
+            *cc = 0;
+        } else if (n128.exp > (1 << (fmt->exp_size - 1)) - 1) {
+            n128.exp -= fmt->exp_re_bias;
+            *cc = r128.cls == float_class_zero ? 1 : 3;
+        } else {
+            *cc = r128.cls == float_class_zero ? 0 : 2;
+        }
+
+        /* Adjust signs of zero results */
+        parts_float_to_float_narrow(r, &r128, status);
+        if (r->cls == float_class_zero) {
+            r->sign = a->sign;
+        }
+        parts_float_to_float_narrow(n, &n128, status);
+        if (n->cls == float_class_zero) {
+            n->sign = a->sign ^ b->sign;
+        }
+
+        status->float_exception_flags = float_exception_flags;
+    }
+}
+
+#define DEFINE_S390_DIVIDE_TO_INTEGER(floatN)                                  \
+void floatN ## _s390_divide_to_integer(floatN a, floatN b,                     \
+                                       int final_quotient_rounding_mode,       \
+                                       bool mask_underflow, bool mask_inexact, \
+                                       floatN *r, floatN *n,                   \
+                                       uint32_t *cc, int *dxc,                 \
+                                       float_status *status)                   \
+{                                                                              \
+    FloatParts64 pa, pb, pr, pn;                                               \
+                                                                               \
+    floatN ## _unpack_canonical(&pa, a, status);                               \
+    floatN ## _unpack_canonical(&pb, b, status);                               \
+    parts_s390_divide_to_integer(&pa, &pb, final_quotient_rounding_mode,       \
+                                 mask_underflow, mask_inexact,                 \
+                                 &floatN ## _params,                           \
+                                 &pr, &pn, cc, dxc, status);                   \
+    *r = floatN ## _round_pack_canonical(&pr, status);                         \
+    *n = floatN ## _round_pack_canonical(&pn, status);                         \
+}
+
+DEFINE_S390_DIVIDE_TO_INTEGER(float32)
+DEFINE_S390_DIVIDE_TO_INTEGER(float64)
+
 static void __attribute__((constructor)) softfloat_init(void)
 {
     union_float64 ua, ub, uc, ur;
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index c18ab2cb609..66b0c47b5eb 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -1372,4 +1372,15 @@ static inline bool float128_unordered_quiet(float128 a, float128 b,
 *----------------------------------------------------------------------------*/
 float128 float128_default_nan(float_status *status);
 
+#define DECLARE_S390_DIVIDE_TO_INTEGER(floatN)                                 \
+void floatN ## _s390_divide_to_integer(floatN a, floatN b,                     \
+                                       int final_quotient_rounding_mode,       \
+                                       bool mask_underflow, bool mask_inexact, \
+                                       floatN *r, floatN *n,                   \
+                                       uint32_t *cc, int *dxc,                 \
+                                       float_status *status)
+DECLARE_S390_DIVIDE_TO_INTEGER(float32);
+DECLARE_S390_DIVIDE_TO_INTEGER(float64);
+
+
 #endif /* SOFTFLOAT_H */
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 1a8a76abb98..6a7426fdac7 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -46,6 +46,7 @@ DEF_HELPER_FLAGS_3(sxb, TCG_CALL_NO_WG, i128, env, i128, i128)
 DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(dxb, TCG_CALL_NO_WG, i128, env, i128, i128)
+DEF_HELPER_6(dib, void, env, i32, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
index 7a3ff501a46..122994960a6 100644
--- a/target/s390x/tcg/fpu_helper.c
+++ b/target/s390x/tcg/fpu_helper.c
@@ -315,6 +315,62 @@ Int128 HELPER(dxb)(CPUS390XState *env, Int128 a, Int128 b)
     return RET128(ret);
 }
 
+void HELPER(dib)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
+                 uint32_t m4, uint32_t bits)
+{
+    int final_quotient_rounding_mode = s390_get_bfp_rounding_mode(env, m4);
+    bool mask_underflow = (env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW;
+    bool mask_inexact = (env->fpc >> 24) & S390_IEEE_MASK_INEXACT;
+    float32 a32, b32, n32, r32;
+    float64 a64, b64, n64, r64;
+    int dxc = -1;
+    uint32_t cc;
+
+    if (bits == 32) {
+        a32 = env->vregs[r1][0] >> 32;
+        b32 = env->vregs[r2][0] >> 32;
+
+        float32_s390_divide_to_integer(
+            a32, b32,
+            final_quotient_rounding_mode,
+            mask_underflow, mask_inexact,
+            &r32, &n32, &cc, &dxc, &env->fpu_status);
+    } else {
+        a64 = env->vregs[r1][0];
+        b64 = env->vregs[r2][0];
+
+        float64_s390_divide_to_integer(
+            a64, b64,
+            final_quotient_rounding_mode,
+            mask_underflow, mask_inexact,
+            &r64, &n64, &cc, &dxc, &env->fpu_status);
+    }
+
+    /* Flush the results if needed */
+    if ((env->fpu_status.float_exception_flags & float_flag_invalid) &&
+        ((env->fpc >> 24) & S390_IEEE_MASK_INVALID)) {
+        /* The action for invalid operation is "Suppress" */
+    } else {
+        /* The action for other exceptions is "Complete" */
+        if (bits == 32) {
+            env->vregs[r1][0] = deposit64(env->vregs[r1][0], 32, 32, r32);
+            env->vregs[r3][0] = deposit64(env->vregs[r3][0], 32, 32, n32);
+        } else {
+            env->vregs[r1][0] = r64;
+            env->vregs[r3][0] = n64;
+        }
+        env->cc_op = cc;
+    }
+
+    /* Raise an exception if needed */
+    if (dxc == -1) {
+        handle_exceptions(env, false, GETPC());
+    } else {
+        env->fpu_status.float_exception_flags = 0;
+        tcg_s390_data_exception(env, dxc, GETPC());
+    }
+}
+
 /* 32-bit FP multiplication */
 uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index baaafe922e9..0d5392eac54 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -9,7 +9,7 @@
  *  OPC  = (op << 8) | op2 where op is the major, op2 the minor opcode
  *  NAME = name of the opcode, used internally
  *  FMT  = format of the opcode (defined in insn-format.h.inc)
- *  FAC  = facility the opcode is available in (defined in DisasFacility)
+ *  FAC  = facility the opcode is available in (define in translate.c)
  *  I1   = func in1_xx fills o->in1
  *  I2   = func in2_xx fills o->in2
  *  P    = func prep_xx initializes o->*out*
@@ -361,6 +361,9 @@
     C(0xb91d, DSGFR,   RRE,   Z,   r1p1, r2_32s, r1_P, 0, divs64, 0)
     C(0xe30d, DSG,     RXY_a, Z,   r1p1, m2_64, r1_P, 0, divs64, 0)
     C(0xe31d, DSGF,    RXY_a, Z,   r1p1, m2_32s, r1_P, 0, divs64, 0)
+/* DIVIDE TO INTEGER */
+    D(0xb35b, DIDBR,   RRF_b, Z,   0, 0, 0, 0, dib, 0, 64)
+    D(0xb353, DIEBR,   RRF_b, Z,   0, 0, 0, 0, dib, 0, 32)
 
 /* EXCLUSIVE OR */
     C(0x1700, XR,      RR_a,  Z,   r1, r2, new, r1_32, xor, nz32)
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 540c5a569c0..dee0e710f39 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2283,6 +2283,32 @@ static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static DisasJumpType op_dib(DisasContext *s, DisasOps *o)
+{
+    const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT);
+    uint8_t m4 = get_field(s, m4);
+
+    if (get_field(s, r1) == get_field(s, r2) ||
+        get_field(s, r1) == get_field(s, r3) ||
+        get_field(s, r2) == get_field(s, r3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (m4 == 2 || (!fpe && m4 == 3) || m4 > 7) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_helper_dib(tcg_env, tcg_constant_i32(get_field(s, r1)),
+                   tcg_constant_i32(get_field(s, r2)),
+                   tcg_constant_i32(get_field(s, r3)), tcg_constant_i32(m4),
+                   tcg_constant_i32(s->insn->data));
+    set_cc_static(s);
+
+    return DISAS_NEXT;
+}
+
 static DisasJumpType op_ear(DisasContext *s, DisasOps *o)
 {
     int r2 = get_field(s, r2);
-- 
2.52.0

Re: [PATCH v2 3/4] target/s390x: Implement DIVIDE TO INTEGER

Posted by Richard Henderson 1 week, 4 days ago

On 1/28/26 02:31, Ilya Leoshkevich wrote:
> +        FloatParts128 a128, b128, *m128, m128_buf, n128, *q128, q128_buf,
> +                      r128, *r128_precise;
> +        int float_exception_flags = 0;
> +        bool is_q128_smallish;
> +        uint32_t r_flags;
> +        int saved_flags;
> +
> +        /* Compute precise quotient */
> +        parts_float_to_float_widen(&a128, a, status);
> +        parts_float_to_float_widen(&b128, b, status);
> +        q128_buf = a128;
> +        q128 = parts_div(&q128_buf, &b128, status);

Why do you need FloatParts128?
You can see an inexact result from float64 with just FloatParts64.
C.f. soft_f64_div.

> +
> +        /* Final or partial case? */
> +        is_q128_smallish = q128->exp < (fmt->frac_size + 1);
> +
> +        /*
> +         * Final quotient is rounded using final-quotient-rounding method, and
> +         * partial quotient is rounded toward zero.
> +         *
> +         * Rounding of partial quotient may be inexact. This is the whole point
> +         * of distinguishing partial quotients, so ignore the exception.
> +         */
> +        n128 = *q128;
> +        saved_flags = status->float_exception_flags;
> +        parts_round_to_int(&n128,
> +                           is_q128_smallish ? final_quotient_rounding_mode :
> +                                              float_round_to_zero,
> +                           0, status, &float128_params);

float128_params is definitely wrong.  The rounding is supposed to be to the target format.

> +        float_exception_flags = saved_flags;
> +        parts_s390_precision_round_normal(&n128, fmt);

parts_round_to_int_normal already takes a rounding mode and frac_size.
It also returns whether or not the rounding was exact.
This appears to be trying to reinvent the wheel.

There's still "... the two integers closest to this precise quotient cannot be both be 
represented exactly in the precision of the quotent ..." to contend with.  Is that your 
"smallish" test?  If so, the comment could use some improvement.

> +
> +        /* Compute precise remainder */
> +        m128_buf = b128;
> +        m128 = parts_mul(&m128_buf, &n128, status);
> +        m128->sign = !m128->sign;
> +        status->float_exception_flags = 0;
> +        r128_precise = parts_addsub(m128, &a128, status, false);

Surely parts_muladd_scalbn (with scale 0).


r~

Re: [PATCH v2 3/4] target/s390x: Implement DIVIDE TO INTEGER

Posted by Ilya Leoshkevich 1 week, 4 days ago

On 1/28/26 06:50, Richard Henderson wrote:
> On 1/28/26 02:31, Ilya Leoshkevich wrote:
>> +        FloatParts128 a128, b128, *m128, m128_buf, n128, *q128, 
>> q128_buf,
>> +                      r128, *r128_precise;
>> +        int float_exception_flags = 0;
>> +        bool is_q128_smallish;
>> +        uint32_t r_flags;
>> +        int saved_flags;
>> +
>> +        /* Compute precise quotient */
>> +        parts_float_to_float_widen(&a128, a, status);
>> +        parts_float_to_float_widen(&b128, b, status);
>> +        q128_buf = a128;
>> +        q128 = parts_div(&q128_buf, &b128, status);
>
> Why do you need FloatParts128?
> You can see an inexact result from float64 with just FloatParts64.
> C.f. soft_f64_div.


I thought I needed this extra precision, but turns out that in reality I 
needed it to mask an issue with parts_round_to_int() - see below.


>> +
>> +        /* Final or partial case? */
>> +        is_q128_smallish = q128->exp < (fmt->frac_size + 1);
>> +
>> +        /*
>> +         * Final quotient is rounded using final-quotient-rounding 
>> method, and
>> +         * partial quotient is rounded toward zero.
>> +         *
>> +         * Rounding of partial quotient may be inexact. This is the 
>> whole point
>> +         * of distinguishing partial quotients, so ignore the 
>> exception.
>> +         */
>> +        n128 = *q128;
>> +        saved_flags = status->float_exception_flags;
>> +        parts_round_to_int(&n128,
>> +                           is_q128_smallish ? 
>> final_quotient_rounding_mode :
>> + float_round_to_zero,
>> +                           0, status, &float128_params);
>
> float128_params is definitely wrong.  The rounding is supposed to be 
> to the target format.


Hmm, yes, nothing in this code is about float128, that can't be right.


With some testcases I hit this condition in parts_round_to_int_normal():

     if (a->exp >= frac_size) {
         /* All integral */
         return false;
     }

which makes it a no-op.

I think the code assumes that FloatParts have just been unpacked and all 
low fraction bits are zero, which is not the case for quotient here.

>> +        float_exception_flags = saved_flags;
>> +        parts_s390_precision_round_normal(&n128, fmt);
>
> parts_round_to_int_normal already takes a rounding mode and frac_size.
> It also returns whether or not the rounding was exact.
> This appears to be trying to reinvent the wheel.


Apparently I use this to paper over the above issue, which is not great.

I guess improving parts_round_to_int() to work with non-zero low 
fraction bits would be better.

What do you think?


> There's still "... the two integers closest to this precise quotient 
> cannot be both be represented exactly in the precision of the quotent 
> ..." to contend with.  Is that your "smallish" test?  If so, the 
> comment could use some improvement.


Ok.


>> +
>> +        /* Compute precise remainder */
>> +        m128_buf = b128;
>> +        m128 = parts_mul(&m128_buf, &n128, status);
>> +        m128->sign = !m128->sign;
>> +        status->float_exception_flags = 0;
>> +        r128_precise = parts_addsub(m128, &a128, status, false);
>
> Surely parts_muladd_scalbn (with scale 0).


This works and is much shorter, thanks.


> r~

Re: [PATCH v2 3/4] target/s390x: Implement DIVIDE TO INTEGER

Posted by Richard Henderson 1 week, 3 days ago

On 1/29/26 00:19, Ilya Leoshkevich wrote:
> With some testcases I hit this condition in parts_round_to_int_normal():
> 
>      if (a->exp >= frac_size) {
>          /* All integral */
>          return false;
>      }
> 
> which makes it a no-op.
> 
> I think the code assumes that FloatParts have just been unpacked and all low fraction bits 
> are zero, which is not the case for quotient here.
> 
>>> +        float_exception_flags = saved_flags;
>>> +        parts_s390_precision_round_normal(&n128, fmt);
>>
>> parts_round_to_int_normal already takes a rounding mode and frac_size.
>> It also returns whether or not the rounding was exact.
>> This appears to be trying to reinvent the wheel.
> 
> 
> Apparently I use this to paper over the above issue, which is not great.
> 
> I guess improving parts_round_to_int() to work with non-zero low fraction bits would be 
> better.
> 
> What do you think?

Yes indeed.  I think (a->exp >= N) should be sufficient?


r~

Re: [PATCH v2 3/4] target/s390x: Implement DIVIDE TO INTEGER

Posted by Ilya Leoshkevich 1 week, 3 days ago

On 2026-01-28 21:38, Richard Henderson wrote:
> On 1/29/26 00:19, Ilya Leoshkevich wrote:
>> With some testcases I hit this condition in 
>> parts_round_to_int_normal():
>> 
>>      if (a->exp >= frac_size) {
>>          /* All integral */
>>          return false;
>>      }
>> 
>> which makes it a no-op.
>> 
>> I think the code assumes that FloatParts have just been unpacked and 
>> all low fraction bits are zero, which is not the case for quotient 
>> here.
>> 
>>>> +        float_exception_flags = saved_flags;
>>>> +        parts_s390_precision_round_normal(&n128, fmt);
>>> 
>>> parts_round_to_int_normal already takes a rounding mode and 
>>> frac_size.
>>> It also returns whether or not the rounding was exact.
>>> This appears to be trying to reinvent the wheel.
>> 
>> 
>> Apparently I use this to paper over the above issue, which is not 
>> great.
>> 
>> I guess improving parts_round_to_int() to work with non-zero low 
>> fraction bits would be better.
>> 
>> What do you think?
> 
> Yes indeed.  I think (a->exp >= N) should be sufficient?

Seems like this does not work for really large quotients, because we 
still need to trim the fraction bits.
So I'm currently evaluating the following, which looks promising so far:

--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -1118,11 +1118,6 @@ static bool 
partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
          return true;
      }

-    if (a->exp >= frac_size) {
-        /* All integral */
-        return false;
-    }
-
      if (N > 64 && a->exp < N - 64) {
          /*
           * Rounding is not in the low word -- shift lsb to bit 2,
@@ -1133,7 +1128,7 @@ static bool 
partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
          frac_lsb = 1 << 2;
      } else {
          shift_adj = 0;
-        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
+        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> MIN(a->exp, frac_size);
      }

> r~

Re: [PATCH v2 3/4] target/s390x: Implement DIVIDE TO INTEGER

Posted by Richard Henderson 6 days, 12 hours ago

On 1/30/26 04:24, Ilya Leoshkevich wrote:
> On 2026-01-28 21:38, Richard Henderson wrote:
>> On 1/29/26 00:19, Ilya Leoshkevich wrote:
>>> With some testcases I hit this condition in parts_round_to_int_normal():
>>>
>>>      if (a->exp >= frac_size) {
>>>          /* All integral */
>>>          return false;
>>>      }
>>>
>>> which makes it a no-op.
>>>
>>> I think the code assumes that FloatParts have just been unpacked and all low fraction 
>>> bits are zero, which is not the case for quotient here.
>>>
>>>>> +        float_exception_flags = saved_flags;
>>>>> +        parts_s390_precision_round_normal(&n128, fmt);
>>>>
>>>> parts_round_to_int_normal already takes a rounding mode and frac_size.
>>>> It also returns whether or not the rounding was exact.
>>>> This appears to be trying to reinvent the wheel.
>>>
>>>
>>> Apparently I use this to paper over the above issue, which is not great.
>>>
>>> I guess improving parts_round_to_int() to work with non-zero low fraction bits would be 
>>> better.
>>>
>>> What do you think?
>>
>> Yes indeed.  I think (a->exp >= N) should be sufficient?
> 
> Seems like this does not work for really large quotients, because we still need to trim 
> the fraction bits.
> So I'm currently evaluating the following, which looks promising so far:
> 
> --- a/fpu/softfloat-parts.c.inc
> +++ b/fpu/softfloat-parts.c.inc
> @@ -1118,11 +1118,6 @@ static bool partsN(round_to_int_normal)(FloatPartsN *a, 
> FloatRoundMode rmode,
>           return true;
>       }
> 
> -    if (a->exp >= frac_size) {
> -        /* All integral */
> -        return false;
> -    }
> -
>       if (N > 64 && a->exp < N - 64) {
>           /*
>            * Rounding is not in the low word -- shift lsb to bit 2,
> @@ -1133,7 +1128,7 @@ static bool partsN(round_to_int_normal)(FloatPartsN *a, 
> FloatRoundMode rmode,
>           frac_lsb = 1 << 2;
>       } else {
>           shift_adj = 0;
> -        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
> +        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> MIN(a->exp, frac_size);
>       }

Yep, that makes sense.

r~

[PATCH v2 1/4] target/s390x: Dump Floating-Point-Control Register
[PATCH v2 2/4] target/s390x: Extract s390_get_bfp_rounding_mode()
[PATCH v2 3/4] target/s390x: Implement DIVIDE TO INTEGER
[PATCH v2 4/4] tests/tcg/s390x: Test DIVIDE TO INTEGER