target/mips: Add MAC2008 support

[PATCH] target/mips: Add MAC2008 support

Posted by Jiaxun Yang 5 years, 10 months ago

MAC2008 was introduced in MIPS Release 3 but removed in MIPS Release 5.
However, there are some processors implemented this feature.
some Ingenic MCU can config MAC2008 status runtime while whole
Loongson-64 family are MAC2008 only.

FCSR.MAC2008 bit indicates FMA family of instructions on these
processors have fused behavior, similiar to FMA in Release 6,
so we can reuse helpers with them.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
 target/mips/cpu.h        |  1 +
 target/mips/fpu_helper.c | 61 +++++++++++++++++++++------------
 target/mips/helper.h     | 12 +++----
 target/mips/translate.c  | 74 +++++++++++++++++++++++++++++++++-------
 4 files changed, 107 insertions(+), 41 deletions(-)

diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 94d01ea798..b20e6e3387 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -63,6 +63,7 @@ struct CPUMIPSFPUContext {
     uint32_t fcr31_rw_bitmask;
     uint32_t fcr31;
 #define FCR31_FS 24
+#define FCR31_MAC2008 20
 #define FCR31_ABS2008 19
 #define FCR31_NAN2008 18
 #define SET_FP_COND(num, env)     do { ((env).fcr31) |=                 \
diff --git a/target/mips/fpu_helper.c b/target/mips/fpu_helper.c
index 5287c86c61..2e50d50f36 100644
--- a/target/mips/fpu_helper.c
+++ b/target/mips/fpu_helper.c
@@ -1357,7 +1357,7 @@ FLOAT_MINMAX(mina_d, 64, minnummag)
     }                                                                \
 }
 
-/* FMA based operations */
+/* FMA based operations (both unfused and fused) */
 #define FLOAT_FMA(name, type)                                        \
 uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,              \
                                      uint64_t fdt0, uint64_t fdt1,   \
@@ -1392,33 +1392,52 @@ uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,             \
     UNFUSED_FMA(float32, fsth0, fsth1, fsth2, type);                 \
     update_fcr31(env, GETPC());                                      \
     return ((uint64_t)fsth0 << 32) | fst0;                           \
+}                                                                    \
+uint64_t helper_float_ ## name ## f_d(CPUMIPSState *env,             \
+                                     uint64_t fdt0, uint64_t fdt1,   \
+                                     uint64_t fdt2)                  \
+{                                                                    \
+    fdt0 = float64_muladd(fdt0, fdt1, fdt2, type,                    \
+                            &env->active_fpu.fp_status);             \
+    update_fcr31(env, GETPC());                                      \
+    return fdt0;                                                     \
+}                                                                    \
+                                                                     \
+uint32_t helper_float_ ## name ## f_s(CPUMIPSState *env,             \
+                                     uint32_t fst0, uint32_t fst1,   \
+                                     uint32_t fst2)                  \
+{                                                                    \
+    fst0 = float32_muladd(fst0, fst1, fst2, type,                    \
+                            &env->active_fpu.fp_status);             \
+    update_fcr31(env, GETPC());                                      \
+    return fst0;                                                     \
+}                                                                    \
+                                                                     \
+uint64_t helper_float_ ## name ## f_ps(CPUMIPSState *env,            \
+                                      uint64_t fdt0, uint64_t fdt1,  \
+                                      uint64_t fdt2)                 \
+{                                                                    \
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                               \
+    uint32_t fsth0 = fdt0 >> 32;                                     \
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                               \
+    uint32_t fsth1 = fdt1 >> 32;                                     \
+    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                               \
+    uint32_t fsth2 = fdt2 >> 32;                                     \
+                                                                     \
+    fst0 = float32_muladd(fst0, fst1, fst2, type,                    \
+                            &env->active_fpu.fp_status);             \
+    fsth0 = float32_muladd(fsth0, fsth1, fsth2, type,                \
+                            &env->active_fpu.fp_status);             \
+    update_fcr31(env, GETPC());                                      \
+    return ((uint64_t)fsth0 << 32) | fst0;                           \
 }
+
 FLOAT_FMA(madd, 0)
 FLOAT_FMA(msub, float_muladd_negate_c)
 FLOAT_FMA(nmadd, float_muladd_negate_result)
 FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
 #undef FLOAT_FMA
 
-#define FLOAT_FMADDSUB(name, bits, muladd_arg)                          \
-uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env,             \
-                                         uint ## bits ## _t fs,         \
-                                         uint ## bits ## _t ft,         \
-                                         uint ## bits ## _t fd)         \
-{                                                                       \
-    uint ## bits ## _t fdret;                                           \
-                                                                        \
-    fdret = float ## bits ## _muladd(fs, ft, fd, muladd_arg,            \
-                                     &env->active_fpu.fp_status);       \
-    update_fcr31(env, GETPC());                                         \
-    return fdret;                                                       \
-}
-
-FLOAT_FMADDSUB(maddf_s, 32, 0)
-FLOAT_FMADDSUB(maddf_d, 64, 0)
-FLOAT_FMADDSUB(msubf_s, 32, float_muladd_negate_product)
-FLOAT_FMADDSUB(msubf_d, 64, float_muladd_negate_product)
-#undef FLOAT_FMADDSUB
-
 /* compare operations */
 #define FOP_COND_D(op, cond)                                   \
 void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
diff --git a/target/mips/helper.h b/target/mips/helper.h
index 84fdd9fd27..56aad63931 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -232,13 +232,6 @@ DEF_HELPER_3(float_mulr_ps, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(float_class_s, TCG_CALL_NO_RWG_SE, i32, env, i32)
 DEF_HELPER_FLAGS_2(float_class_d, TCG_CALL_NO_RWG_SE, i64, env, i64)
 
-#define FOP_PROTO(op)                                     \
-DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32) \
-DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)
-FOP_PROTO(maddf)
-FOP_PROTO(msubf)
-#undef FOP_PROTO
-
 #define FOP_PROTO(op)                                \
 DEF_HELPER_3(float_ ## op ## _s, i32, env, i32, i32) \
 DEF_HELPER_3(float_ ## op ## _d, i64, env, i64, i64)
@@ -305,7 +298,10 @@ FOP_PROTO(rsqrt2)
 #define FOP_PROTO(op)                                      \
 DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32)  \
 DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)  \
-DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64)
+DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64) \
+DEF_HELPER_4(float_ ## op ## f_s, i32, env, i32, i32, i32)  \
+DEF_HELPER_4(float_ ## op ## f_d, i64, env, i64, i64, i64)  \
+DEF_HELPER_4(float_ ## op ## f_ps, i64, env, i64, i64, i64)
 FOP_PROTO(madd)
 FOP_PROTO(msub)
 FOP_PROTO(nmadd)
diff --git a/target/mips/translate.c b/target/mips/translate.c
index d745bd2803..3ce159df97 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -2547,6 +2547,7 @@ typedef struct DisasContext {
     bool mrp;
     bool nan2008;
     bool abs2008;
+    bool mac2008;
     bool saar;
     bool mi;
     int gi;
@@ -12776,7 +12777,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
             gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(ctx, fp2, fd);
@@ -12794,7 +12799,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_maddf_d(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12811,7 +12820,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_maddf_ps(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12828,7 +12841,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
             gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_msubf_s(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(ctx, fp2, fd);
@@ -12846,7 +12863,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_msubf_d(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12863,7 +12884,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_msubf_ps(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12880,7 +12905,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
             gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmaddf_s(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(ctx, fp2, fd);
@@ -12898,7 +12927,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmaddf_d(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12915,7 +12948,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmaddf_ps(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12932,7 +12969,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
             gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmsubf_s(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(ctx, fp2, fd);
@@ -12950,7 +12991,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmsubf_d(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12967,7 +13012,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmsubf_ps(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -30807,6 +30856,7 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->mrp = (env->CP0_Config5 >> CP0C5_MRP) & 1;
     ctx->nan2008 = (env->active_fpu.fcr31 >> FCR31_NAN2008) & 1;
     ctx->abs2008 = (env->active_fpu.fcr31 >> FCR31_ABS2008) & 1;
+    ctx->mac2008 = (env->active_fpu.fcr31 >> FCR31_MAC2008) & 1;
     ctx->mi = (env->CP0_Config5 >> CP0C5_MI) & 1;
     ctx->gi = (env->CP0_Config5 >> CP0C5_GI) & 3;
     restore_cpu_state(env, ctx);
-- 
2.26.0.rc2

Re: [PATCH] target/mips: Add MAC2008 support

Posted by Richard Henderson 5 years, 10 months ago

On 3/28/20 2:08 AM, Jiaxun Yang wrote:
> -            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
> +            if (ctx->mac2008) {
> +                gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
> +            } else {
> +                gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
> +            }
>  

Surely this test is backward, that mac2008 invokes maddf.


r~

Re: [PATCH] target/mips: Add MAC2008 support

Posted by Jiaxun Yang 5 years, 10 months ago


于 2020年3月29日 GMT+08:00 上午3:09:16, Richard Henderson <richard.henderson@linaro.org> 写到:
>On 3/28/20 2:08 AM, Jiaxun Yang wrote:
>> -            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
>> +            if (ctx->mac2008) {
>> +                gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1,
>fp2);
>> +            } else {
>> +                gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1,
>fp2);
>> +            }
>>  
>
>Surely this test is backward, that mac2008 invokes maddf.

 Sorry for my stupid fault.
Will fix in v2.

>
>
>r~

-- 
Jiaxun Yang