[PATCH v2 3/4] target/m68k: Implement packed decimal real stores

Richard Henderson posted 4 patches 3 months, 2 weeks ago
[PATCH v2 3/4] target/m68k: Implement packed decimal real stores
Posted by Richard Henderson 3 months, 2 weeks ago
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2488
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/m68k/helper.h     |   1 +
 target/m68k/fpu_helper.c | 112 +++++++++++++++++++++++++++++++++++++++
 target/m68k/translate.c  |  27 ++++++++--
 3 files changed, 137 insertions(+), 3 deletions(-)

diff --git a/target/m68k/helper.h b/target/m68k/helper.h
index 2c71361451..21af6adb39 100644
--- a/target/m68k/helper.h
+++ b/target/m68k/helper.h
@@ -127,6 +127,7 @@ DEF_HELPER_3(chk, void, env, s32, s32)
 DEF_HELPER_4(chk2, void, env, s32, s32, s32)
 
 DEF_HELPER_FLAGS_3(load_pdr_to_fx80, TCG_CALL_NO_RWG, void, env, fp, tl)
+DEF_HELPER_FLAGS_4(store_fx80_to_pdr, TCG_CALL_NO_RWG, void, env, tl, fp, int)
 
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_3(ptest, void, env, i32, i32)
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
index 50b0e36cbe..0c9c3b7b64 100644
--- a/target/m68k/fpu_helper.c
+++ b/target/m68k/fpu_helper.c
@@ -750,6 +750,29 @@ void HELPER(fcosh)(CPUM68KState *env, FPReg *res, FPReg *val)
     res->d = floatx80_cosh(val->d, &env->fp_status);
 }
 
+/* 10**0 through 10**18 */
+static const int64_t i64_pow10[] = {
+    1ll,
+    10ll,
+    100ll,
+    1000ll,
+    10000ll,
+    100000ll,
+    1000000ll,
+    10000000ll,
+    100000000ll,
+    1000000000ll,
+    10000000000ll,
+    100000000000ll,
+    1000000000000ll,
+    10000000000000ll,
+    100000000000000ll,
+    1000000000000000ll,
+    10000000000000000ll,
+    100000000000000000ll,
+    1000000000000000000ll,
+};
+
 static const floatx80 floatx80_pow10[] = {
 #include "floatx80-pow10.c.inc"
 };
@@ -838,3 +861,92 @@ void HELPER(load_pdr_to_fx80)(CPUM68KState *env, FPReg *res, target_ulong addr)
     t = int64_to_floatx80(mant, &env->fp_status),
     res->d = floatx80_scale10i(t, exp, &env->fp_status);
 }
+
+void HELPER(store_fx80_to_pdr)(CPUM68KState *env, target_ulong addr,
+                               FPReg *srcp, int kfactor)
+{
+    floatx80 x = srcp->d;
+    int len, exp2, exp10;
+    uint64_t res_lo;
+    uint32_t res_hi;
+    int64_t y;
+
+    res_lo = x.low;
+    exp2 = x.high & 0x7fff;
+    if (unlikely(exp2 == 0x7fff)) {
+        /* NaN and Inf */
+        res_hi = (uint32_t)x.high << 16;
+        goto done;
+    }
+
+    /* Copy the sign bit to the output, and then x = abs(x). */
+    res_hi = (x.high & 0x8000u) << 16;
+    x.high &= 0x7fff;
+
+    if (exp2 == 0) {
+        if (res_lo == 0) {
+            /* +/- 0 */
+            goto done;
+        }
+        /* denormal */
+        exp2 = -0x3fff - clz64(res_lo);
+    } else {
+        exp2 -= 0x3fff;
+    }
+
+    /*
+     * Begin with an approximation of log2(x) via the base 2 exponent.
+     * Adjust, so that we compute the value scaled by 10**17, which will
+     * allows an integer to be extracted to match the output digits.
+     */
+    exp10 = (exp2 * 30102) / 100000;
+    while (1) {
+        floatx80 t;
+
+        /* kfactor controls the number of output digits */
+        if (kfactor <= 0) {
+            /* kfactor is number of digits right of the decimal point. */
+            len = exp10 - kfactor;
+        } else {
+            /* kfactor is number of significant digits */
+            len = kfactor;
+        }
+        len = MIN(MAX(len, 1), 17);
+
+        /*
+         * Scale, so that we have the requested number of digits
+         * left of the decimal point.  Convert to integer, which
+         * handles the rounding (and may force adjustment of exp10).
+         */
+        t = floatx80_scale10i(x, len - 1 - exp10, &env->fp_status);
+        y = floatx80_to_int64(t, &env->fp_status);
+        if (y < i64_pow10[len - 1]) {
+            exp10--;
+        } else if (y < i64_pow10[len]) {
+            break;
+        } else {
+            exp10++;
+        }
+    }
+
+    /* Output the mantissa. */
+    res_hi |= y / i64_pow10[len - 1];
+    res_lo = 0;
+    for (int i = 1; i < len; ++i) {
+        int64_t d = (y / i64_pow10[len - 1 - i]) % 10;
+        res_lo |= d << (64 - i * 4);
+    }
+
+    /* Output the exponent. */
+    if (exp10 < 0) {
+        res_hi |= 0x40000000;
+        exp10 = -exp10;
+    }
+    for (int i = 24; exp10; i -= 4, exp10 /= 10) {
+        res_hi |= (exp10 % 10) << i;
+    }
+
+ done:
+    cpu_stl_be_data_ra(env, addr, res_hi, GETPC());
+    cpu_stq_be_data_ra(env, addr + 4, res_lo, GETPC());
+}
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 59e7d27393..fb5ecce0c3 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -119,6 +119,8 @@ typedef struct DisasContext {
     int done_mac;
     int writeback_mask;
     TCGv writeback[8];
+    uint16_t insn;
+    uint16_t ext;
     bool ss_active;
 } DisasContext;
 
@@ -671,6 +673,7 @@ static inline int ext_opsize(int ext, int pos)
     case 4: return OS_WORD;
     case 5: return OS_DOUBLE;
     case 6: return OS_BYTE;
+    case 7: return OS_PACKED; /* store, dynamic k-factor */
     default:
         g_assert_not_reached();
     }
@@ -1010,11 +1013,25 @@ static void gen_store_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
         tcg_gen_qemu_st_i64(t64, tmp, index, MO_TEUQ);
         break;
     case OS_PACKED:
+        if (!m68k_feature(s->env, M68K_FEATURE_FPU_PACKED_DECIMAL)) {
+            gen_exception(s, s->base.pc_next, EXCP_FP_UNIMP);
+            break;
+        }
         /*
-         * unimplemented data type on 68040/ColdFire
-         * FIXME if needed for another FPU
+         * For stores we must recover k-factor, either from an
+         * immediate or the low 7 bits of a D register.
          */
-        gen_exception(s, s->base.pc_next, EXCP_FP_UNIMP);
+        switch ((s->ext >> 10) & 7) {
+        case 3:
+            tcg_gen_movi_i32(tmp, sextract32(s->ext, 0, 7));
+            break;
+        case 7:
+            tcg_gen_sextract_i32(tmp, DREG(s->ext, 4), 0, 7);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        gen_helper_store_fx80_to_pdr(tcg_env, addr, fp, tmp);
         break;
     default:
         g_assert_not_reached();
@@ -4940,6 +4957,8 @@ DISAS_INSN(fpu)
     TCGv_ptr cpu_src, cpu_dest;
 
     ext = read_im16(env, s);
+    s->ext = ext;
+
     opmode = ext & 0x7f;
     switch ((ext >> 13) & 7) {
     case 0:
@@ -6042,6 +6061,8 @@ static void m68k_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
     CPUM68KState *env = cpu_env(cpu);
     uint16_t insn = read_im16(env, dc);
 
+    dc->insn = insn;
+    dc->ext = 0;
     opcode_table[insn](env, dc, insn);
     do_writebacks(dc);
 
-- 
2.43.0
Re: [PATCH v2 3/4] target/m68k: Implement packed decimal real stores
Posted by Philippe Mathieu-Daudé 3 months, 1 week ago
Hi Richard,

On 12/8/24 02:44, Richard Henderson wrote:
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2488
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/m68k/helper.h     |   1 +
>   target/m68k/fpu_helper.c | 112 +++++++++++++++++++++++++++++++++++++++
>   target/m68k/translate.c  |  27 ++++++++--
>   3 files changed, 137 insertions(+), 3 deletions(-)
> 
> diff --git a/target/m68k/helper.h b/target/m68k/helper.h
> index 2c71361451..21af6adb39 100644
> --- a/target/m68k/helper.h
> +++ b/target/m68k/helper.h
> @@ -127,6 +127,7 @@ DEF_HELPER_3(chk, void, env, s32, s32)
>   DEF_HELPER_4(chk2, void, env, s32, s32, s32)
>   
>   DEF_HELPER_FLAGS_3(load_pdr_to_fx80, TCG_CALL_NO_RWG, void, env, fp, tl)
> +DEF_HELPER_FLAGS_4(store_fx80_to_pdr, TCG_CALL_NO_RWG, void, env, tl, fp, int)
>   
>   #if !defined(CONFIG_USER_ONLY)
>   DEF_HELPER_3(ptest, void, env, i32, i32)
> diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
> index 50b0e36cbe..0c9c3b7b64 100644
> --- a/target/m68k/fpu_helper.c
> +++ b/target/m68k/fpu_helper.c
> @@ -750,6 +750,29 @@ void HELPER(fcosh)(CPUM68KState *env, FPReg *res, FPReg *val)
>       res->d = floatx80_cosh(val->d, &env->fp_status);
>   }
>   
> +/* 10**0 through 10**18 */
> +static const int64_t i64_pow10[] = {
> +    1ll,
> +    10ll,
> +    100ll,
> +    1000ll,
> +    10000ll,
> +    100000ll,
> +    1000000ll,
> +    10000000ll,
> +    100000000ll,
> +    1000000000ll,
> +    10000000000ll,
> +    100000000000ll,
> +    1000000000000ll,
> +    10000000000000ll,
> +    100000000000000ll,
> +    1000000000000000ll,
> +    10000000000000000ll,
> +    100000000000000000ll,
> +    1000000000000000000ll,

10**18 is never used, is that expected?

Could we define KFACTOR_MAX = 17 and use it as
array size?

> +};
> +
>   static const floatx80 floatx80_pow10[] = {
>   #include "floatx80-pow10.c.inc"
>   };
> @@ -838,3 +861,92 @@ void HELPER(load_pdr_to_fx80)(CPUM68KState *env, FPReg *res, target_ulong addr)
>       t = int64_to_floatx80(mant, &env->fp_status),
>       res->d = floatx80_scale10i(t, exp, &env->fp_status);
>   }
> +
> +void HELPER(store_fx80_to_pdr)(CPUM68KState *env, target_ulong addr,
> +                               FPReg *srcp, int kfactor)
> +{
> +    floatx80 x = srcp->d;
> +    int len, exp2, exp10;
> +    uint64_t res_lo;
> +    uint32_t res_hi;
> +    int64_t y;
> +
> +    res_lo = x.low;
> +    exp2 = x.high & 0x7fff;
> +    if (unlikely(exp2 == 0x7fff)) {
> +        /* NaN and Inf */
> +        res_hi = (uint32_t)x.high << 16;
> +        goto done;
> +    }
> +
> +    /* Copy the sign bit to the output, and then x = abs(x). */
> +    res_hi = (x.high & 0x8000u) << 16;
> +    x.high &= 0x7fff;
> +
> +    if (exp2 == 0) {
> +        if (res_lo == 0) {
> +            /* +/- 0 */
> +            goto done;
> +        }
> +        /* denormal */
> +        exp2 = -0x3fff - clz64(res_lo);
> +    } else {
> +        exp2 -= 0x3fff;
> +    }
> +
> +    /*
> +     * Begin with an approximation of log2(x) via the base 2 exponent.
> +     * Adjust, so that we compute the value scaled by 10**17, which will
> +     * allows an integer to be extracted to match the output digits.
> +     */
> +    exp10 = (exp2 * 30102) / 100000;
> +    while (1) {
> +        floatx80 t;
> +
> +        /* kfactor controls the number of output digits */
> +        if (kfactor <= 0) {
> +            /* kfactor is number of digits right of the decimal point. */
> +            len = exp10 - kfactor;
> +        } else {
> +            /* kfactor is number of significant digits */
> +            len = kfactor;
> +        }
> +        len = MIN(MAX(len, 1), 17);
> +
> +        /*
> +         * Scale, so that we have the requested number of digits
> +         * left of the decimal point.  Convert to integer, which
> +         * handles the rounding (and may force adjustment of exp10).
> +         */
> +        t = floatx80_scale10i(x, len - 1 - exp10, &env->fp_status);
> +        y = floatx80_to_int64(t, &env->fp_status);
> +        if (y < i64_pow10[len - 1]) {
> +            exp10--;
> +        } else if (y < i64_pow10[len]) {
> +            break;
> +        } else {
> +            exp10++;
> +        }
> +    }
> +
> +    /* Output the mantissa. */
> +    res_hi |= y / i64_pow10[len - 1];
> +    res_lo = 0;
> +    for (int i = 1; i < len; ++i) {
> +        int64_t d = (y / i64_pow10[len - 1 - i]) % 10;
> +        res_lo |= d << (64 - i * 4);
> +    }
> +
> +    /* Output the exponent. */
> +    if (exp10 < 0) {
> +        res_hi |= 0x40000000;
> +        exp10 = -exp10;
> +    }
> +    for (int i = 24; exp10; i -= 4, exp10 /= 10) {
> +        res_hi |= (exp10 % 10) << i;
> +    }
> +
> + done:
> +    cpu_stl_be_data_ra(env, addr, res_hi, GETPC());
> +    cpu_stq_be_data_ra(env, addr + 4, res_lo, GETPC());
> +}
> diff --git a/target/m68k/translate.c b/target/m68k/translate.c
> index 59e7d27393..fb5ecce0c3 100644
> --- a/target/m68k/translate.c
> +++ b/target/m68k/translate.c
> @@ -119,6 +119,8 @@ typedef struct DisasContext {
>       int done_mac;
>       int writeback_mask;
>       TCGv writeback[8];
> +    uint16_t insn;
> +    uint16_t ext;

Maybe worth commenting here what this field is used for?

>       bool ss_active;
>   } DisasContext;
>   
> @@ -671,6 +673,7 @@ static inline int ext_opsize(int ext, int pos)
>       case 4: return OS_WORD;
>       case 5: return OS_DOUBLE;
>       case 6: return OS_BYTE;
> +    case 7: return OS_PACKED; /* store, dynamic k-factor */
>       default:
>           g_assert_not_reached();
>       }
> @@ -1010,11 +1013,25 @@ static void gen_store_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
>           tcg_gen_qemu_st_i64(t64, tmp, index, MO_TEUQ);
>           break;
>       case OS_PACKED:
> +        if (!m68k_feature(s->env, M68K_FEATURE_FPU_PACKED_DECIMAL)) {
> +            gen_exception(s, s->base.pc_next, EXCP_FP_UNIMP);
> +            break;
> +        }
>           /*
> -         * unimplemented data type on 68040/ColdFire
> -         * FIXME if needed for another FPU
> +         * For stores we must recover k-factor, either from an
> +         * immediate or the low 7 bits of a D register.
>            */
> -        gen_exception(s, s->base.pc_next, EXCP_FP_UNIMP);
> +        switch ((s->ext >> 10) & 7) {
> +        case 3:
> +            tcg_gen_movi_i32(tmp, sextract32(s->ext, 0, 7));
> +            break;
> +        case 7:
> +            tcg_gen_sextract_i32(tmp, DREG(s->ext, 4), 0, 7);
> +            break;
> +        default:
> +            g_assert_not_reached();
> +        }
> +        gen_helper_store_fx80_to_pdr(tcg_env, addr, fp, tmp);
>           break;
>       default:
>           g_assert_not_reached();
> @@ -4940,6 +4957,8 @@ DISAS_INSN(fpu)
>       TCGv_ptr cpu_src, cpu_dest;
>   
>       ext = read_im16(env, s);
> +    s->ext = ext;
> +
>       opmode = ext & 0x7f;
>       switch ((ext >> 13) & 7) {
>       case 0:
> @@ -6042,6 +6061,8 @@ static void m68k_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
>       CPUM68KState *env = cpu_env(cpu);
>       uint16_t insn = read_im16(env, dc);
>   
> +    dc->insn = insn;
> +    dc->ext = 0;
>       opcode_table[insn](env, dc, insn);
>       do_writebacks(dc);
>   

Otherwise patch LGTM.
Re: [PATCH v2 3/4] target/m68k: Implement packed decimal real stores
Posted by Richard Henderson 3 months, 1 week ago
On 8/16/24 16:37, Philippe Mathieu-Daudé wrote:
> Hi Richard,
> 
> On 12/8/24 02:44, Richard Henderson wrote:
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2488
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   target/m68k/helper.h     |   1 +
>>   target/m68k/fpu_helper.c | 112 +++++++++++++++++++++++++++++++++++++++
>>   target/m68k/translate.c  |  27 ++++++++--
>>   3 files changed, 137 insertions(+), 3 deletions(-)
>>
>> diff --git a/target/m68k/helper.h b/target/m68k/helper.h
>> index 2c71361451..21af6adb39 100644
>> --- a/target/m68k/helper.h
>> +++ b/target/m68k/helper.h
>> @@ -127,6 +127,7 @@ DEF_HELPER_3(chk, void, env, s32, s32)
>>   DEF_HELPER_4(chk2, void, env, s32, s32, s32)
>>   DEF_HELPER_FLAGS_3(load_pdr_to_fx80, TCG_CALL_NO_RWG, void, env, fp, tl)
>> +DEF_HELPER_FLAGS_4(store_fx80_to_pdr, TCG_CALL_NO_RWG, void, env, tl, fp, int)
>>   #if !defined(CONFIG_USER_ONLY)
>>   DEF_HELPER_3(ptest, void, env, i32, i32)
>> diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
>> index 50b0e36cbe..0c9c3b7b64 100644
>> --- a/target/m68k/fpu_helper.c
>> +++ b/target/m68k/fpu_helper.c
>> @@ -750,6 +750,29 @@ void HELPER(fcosh)(CPUM68KState *env, FPReg *res, FPReg *val)
>>       res->d = floatx80_cosh(val->d, &env->fp_status);
>>   }
>> +/* 10**0 through 10**18 */
>> +static const int64_t i64_pow10[] = {
>> +    1ll,
>> +    10ll,
>> +    100ll,
>> +    1000ll,
>> +    10000ll,
>> +    100000ll,
>> +    1000000ll,
>> +    10000000ll,
>> +    100000000ll,
>> +    1000000000ll,
>> +    10000000000ll,
>> +    100000000000ll,
>> +    1000000000000ll,
>> +    10000000000000ll,
>> +    100000000000000ll,
>> +    1000000000000000ll,
>> +    10000000000000000ll,
>> +    100000000000000000ll,
>> +    1000000000000000000ll,
> 
> 10**18 is never used, is that expected?

A previous version did.

> Could we define KFACTOR_MAX = 17 and use it as
> array size?

Seems reasonable.

>> @@ -119,6 +119,8 @@ typedef struct DisasContext {
>>       int done_mac;
>>       int writeback_mask;
>>       TCGv writeback[8];
>> +    uint16_t insn;
>> +    uint16_t ext;
> 
> Maybe worth commenting here what this field is used for?

Ok.

r~