[v11] Add LoongArch linux-user emulation support

[PATCH v11 11/26] target/loongarch: Add floating point comparison instruction translation

Posted by Song Gao 2 years, 11 months ago

This includes:
- FCMP.cond.{S/D}

Signed-off-by: Song Gao <gaosong@loongson.cn>
Signed-off-by: Xiaojuan Yang <yangxiaojuan@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/loongarch/fpu_helper.c                | 60 ++++++++++++++++++++++++++++
 target/loongarch/helper.h                    |  9 +++++
 target/loongarch/insn_trans/trans_fcmp.c.inc | 56 ++++++++++++++++++++++++++
 target/loongarch/insns.decode                |  8 ++++
 target/loongarch/internals.h                 |  5 +++
 target/loongarch/translate.c                 |  1 +
 6 files changed, 139 insertions(+)
 create mode 100644 target/loongarch/insn_trans/trans_fcmp.c.inc

diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c
index d0ef675..807ffd0 100644
--- a/target/loongarch/fpu_helper.c
+++ b/target/loongarch/fpu_helper.c
@@ -403,3 +403,63 @@ uint64_t helper_fmuladd_d(CPULoongArchState *env, uint64_t fj,
     update_fcsr0(env, GETPC());
     return fd;
 }
+
+static uint64_t fcmp_common(CPULoongArchState *env, FloatRelation cmp,
+                            uint32_t flags)
+{
+    bool ret;
+
+    switch (cmp) {
+    case float_relation_less:
+        ret = (flags & FCMP_LT);
+        break;
+    case float_relation_equal:
+        ret = (flags & FCMP_EQ);
+        break;
+    case float_relation_greater:
+        ret = (flags & FCMP_GT);
+        break;
+    case float_relation_unordered:
+        ret = (flags & FCMP_UN);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    update_fcsr0(env, GETPC());
+
+    return ret;
+}
+
+/* fcmp_cXXX_s */
+uint64_t helper_fcmp_c_s(CPULoongArchState *env, uint64_t fj,
+                         uint64_t fk, uint32_t flags)
+{
+    FloatRelation cmp = float32_compare_quiet((uint32_t)fj,
+                                              (uint32_t)fk, &env->fp_status);
+    return fcmp_common(env, cmp, flags);
+}
+
+/* fcmp_sXXX_s */
+uint64_t helper_fcmp_s_s(CPULoongArchState *env, uint64_t fj,
+                         uint64_t fk, uint32_t flags)
+{
+    FloatRelation cmp = float32_compare((uint32_t)fj,
+                                        (uint32_t)fk, &env->fp_status);
+    return fcmp_common(env, cmp, flags);
+}
+
+/* fcmp_cXXX_d */
+uint64_t helper_fcmp_c_d(CPULoongArchState *env, uint64_t fj,
+                         uint64_t fk, uint32_t flags)
+{
+    FloatRelation cmp = float64_compare_quiet(fj, fk, &env->fp_status);
+    return fcmp_common(env, cmp, flags);
+}
+
+/* fcmp_sXXX_d */
+uint64_t helper_fcmp_s_d(CPULoongArchState *env, uint64_t fj,
+                         uint64_t fk, uint32_t flags)
+{
+    FloatRelation cmp = float64_compare(fj, fk, &env->fp_status);
+    return fcmp_common(env, cmp, flags);
+}
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index d6bb412..30b270a 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -52,3 +52,12 @@ DEF_HELPER_2(frecip_d, i64, env, i64)
 
 DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, i64, env, i64)
 DEF_HELPER_FLAGS_2(fclass_d, TCG_CALL_NO_RWG_SE, i64, env, i64)
+
+/* fcmp.cXXX.s */
+DEF_HELPER_4(fcmp_c_s, i64, env, i64, i64, i32)
+/* fcmp.sXXX.s */
+DEF_HELPER_4(fcmp_s_s, i64, env, i64, i64, i32)
+/* fcmp.cXXX.d */
+DEF_HELPER_4(fcmp_c_d, i64, env, i64, i64, i32)
+/* fcmp.sXXX.d */
+DEF_HELPER_4(fcmp_s_d, i64, env, i64, i64, i32)
diff --git a/target/loongarch/insn_trans/trans_fcmp.c.inc b/target/loongarch/insn_trans/trans_fcmp.c.inc
new file mode 100644
index 0000000..ce39c07
--- /dev/null
+++ b/target/loongarch/insn_trans/trans_fcmp.c.inc
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+/* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */
+static uint32_t get_fcmp_flags(int cond)
+{
+    uint32_t flags = 0;
+
+    if (cond & 0x1) {
+        flags |= FCMP_LT;
+    }
+    if (cond & 0x2) {
+        flags |= FCMP_EQ;
+    }
+    if (cond & 0x4) {
+        flags |= FCMP_UN;
+    }
+    if (cond & 0x8) {
+        flags |= FCMP_GT | FCMP_LT;
+    }
+    return flags;
+}
+
+static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a)
+{
+    TCGv var = tcg_temp_new();
+    uint32_t flags;
+    void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
+
+    fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s);
+    flags = get_fcmp_flags(a->fcond >> 1);
+
+    fn(var, cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk], tcg_constant_i32(flags));
+
+    tcg_gen_st8_tl(var, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7]));
+    tcg_temp_free(var);
+    return true;
+}
+
+static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a)
+{
+    TCGv var = tcg_temp_new();
+    uint32_t flags;
+    void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
+    fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d);
+    flags = get_fcmp_flags(a->fcond >> 1);
+
+    fn(var, cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk], tcg_constant_i32(flags));
+
+    tcg_gen_st8_tl(var, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7]));
+
+    tcg_temp_free(var);
+    return true;
+}
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 28a2f5e..17e1df8 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -26,6 +26,7 @@
 &ff           fd fj
 &fff          fd fj fk
 &ffff         fd fj fk fa
+&cff_fcond    cd fj fk fcond
 
 #
 # Formats
@@ -50,6 +51,7 @@
 @ff               .... ........ ..... ..... fj:5 fd:5    &ff
 @fff               .... ........ ..... fk:5 fj:5 fd:5    &fff
 @ffff               .... ........ fa:5 fk:5 fj:5 fd:5    &ffff
+@cff_fcond    .... ........ fcond:5 fk:5 fj:5 .. cd:3    &cff_fcond
 
 #
 # Fixed point arithmetic operation instruction
@@ -311,3 +313,9 @@ fcopysign_s     0000 00010001 00101 ..... ..... .....    @fff
 fcopysign_d     0000 00010001 00110 ..... ..... .....    @fff
 fclass_s        0000 00010001 01000 01101 ..... .....    @ff
 fclass_d        0000 00010001 01000 01110 ..... .....    @ff
+
+#
+# Floating point compare instruction
+#
+fcmp_cond_s     0000 11000001 ..... ..... ..... 00 ...   @cff_fcond
+fcmp_cond_d     0000 11000010 ..... ..... ..... 00 ...   @cff_fcond
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index 17219d4..e9e6374 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -8,6 +8,11 @@
 #ifndef LOONGARCH_INTERNALS_H
 #define LOONGARCH_INTERNALS_H
 
+#define FCMP_LT   0x0001  /* fp0 < fp1 */
+#define FCMP_EQ   0x0010  /* fp0 = fp1 */
+#define FCMP_UN   0x0100  /* unordered */
+#define FCMP_GT   0x1000  /* fp0 > fp1 */
+
 void loongarch_translate_init(void);
 
 void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index b7ace9e..fd43e1b 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -191,6 +191,7 @@ static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext)
 #include "insn_trans/trans_atomic.c.inc"
 #include "insn_trans/trans_extra.c.inc"
 #include "insn_trans/trans_farith.c.inc"
+#include "insn_trans/trans_fcmp.c.inc"
 
 static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
-- 
1.8.3.1

Re: [PATCH v11 11/26] target/loongarch: Add floating point comparison instruction translation

Posted by Richard Henderson 2 years, 11 months ago

On 11/19/21 7:13 AM, Song Gao wrote:
> +static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a)
> +{
> +    TCGv var = tcg_temp_new();
> +    uint32_t flags;
> +    void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
> +
> +    fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s);
> +    flags = get_fcmp_flags(a->fcond >> 1);
> +
> +    fn(var, cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk], tcg_constant_i32(flags));
> +
> +    tcg_gen_st8_tl(var, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7]));

No need to mask cd; the decode took care of that.

> +#define FCMP_LT   0x0001  /* fp0 < fp1 */
> +#define FCMP_EQ   0x0010  /* fp0 = fp1 */
> +#define FCMP_UN   0x0100  /* unordered */
> +#define FCMP_GT   0x1000  /* fp0 > fp1 */

Any reason why these bits are not sequential?


r~

Re: [PATCH v11 11/26] target/loongarch: Add floating point comparison instruction translation

Posted by gaosong 2 years, 11 months ago

Hi Richard,

On 2021/11/20 下午5:02, Richard Henderson wrote:
>
>> +#define FCMP_LT 0x0001  /* fp0 < fp1 */
>> +#define FCMP_EQ   0x0010  /* fp0 = fp1 */
>> +#define FCMP_UN   0x0100  /* unordered */
>> +#define FCMP_GT   0x1000  /* fp0 > fp1 */
>
> Any reason why these bits are not sequential? 
I think this is consistent with the description in Table 9,  and the 
FCMP_GT is need 0x7.
In  get_fcmp_flags(int cond)  the param 'cond' is table 9  COND >> 1,
table 9
COND          True Condition
0x2              LT
0x4              EQ
0x6              EQ  LT
0x8              UN
0x10            GT LT
...
COND >> 1   Flags
0x1              FCMP_LT                            FCMP_LT    0x1
0x2              FCMP_EQ                           FCMP_EQ    0x2
0x3              FCMP_EQ  |  FCMP_LT
0x4              FCMP_UN                          FCMP_UN   0x4
0x8              FCMP_GT |  FCMP_LT          FCMP_GT    0x7
...
so our definitions :
#define FCMP_LT    0x1  /* fp0 < fp1 */
#define FCMP_EQ   0x2  /* fp0 = fp1 */
#define FCMP_UN   0x4  /* unordered */
#define FCMP_GT   0x7  /* fp0 > fp1 */

get_fcmp_flags:
/* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */
static uint32_t get_fcmp_flags(int cond)
{
     uint32_t flags = 0;

     if (cond & 0x1) {
         flags |= FCMP_LT;
     }
     if (cond & 0x2) {
         flags |= FCMP_EQ;
     }
     if (cond & 0x4) {
         flags |= FCMP_UN;
     }
     if (cond & 0x8) {
         flags |= FCMP_GT | FCMP_LT;
     }
     return flags;
}

If not .

We should like:

#define FCMP_LT    0x1  /* fp0 < fp1 */
#define FCMP_EQ   0x2  /* fp0 = fp1 */
#define FCMP_UN   0x3  /* unordered */
#define FCMP_GT   0x4  /* fp0 > fp1 */

static uint32_t get_fcmp_flags(int cond)
{
     uint32_t flags = 0;

     if (cond & 0x1) {
         flags |= FCMP_LT;
     }
     if (cond & 0x2) {
         flags |= FCMP_EQ;
     }
     if (cond & 0x3) {
         flags |= FCMP_UN;
     }
     if (cond & 0x4) {
         flags |= FCMP_GT;
     }
     return flags;
}

Is this right?

Thanks
Song Gao

Re: [PATCH v11 11/26] target/loongarch: Add floating point comparison instruction translation

Posted by Richard Henderson 2 years, 11 months ago

On 11/30/21 9:22 AM, gaosong wrote:
> On 2021/11/20 下午5:02, Richard Henderson wrote:
>>
>>> +#define FCMP_LT 0x0001  /* fp0 < fp1 */
>>> +#define FCMP_EQ   0x0010  /* fp0 = fp1 */
>>> +#define FCMP_UN   0x0100  /* unordered */
>>> +#define FCMP_GT   0x1000  /* fp0 > fp1 */
>>
>> Any reason why these bits are not sequential? 
...
> We should like:
> 
> #define FCMP_LT    0x1  /* fp0 < fp1 */
> #define FCMP_EQ   0x2  /* fp0 = fp1 */
> #define FCMP_UN   0x3  /* unordered */
> #define FCMP_GT   0x4  /* fp0 > fp1 */
> 
> static uint32_t get_fcmp_flags(int cond)
> {
>      uint32_t flags = 0;
> 
>      if (cond & 0x1) {
>          flags |= FCMP_LT;
>      }
>      if (cond & 0x2) {
>          flags |= FCMP_EQ;
>      }
>      if (cond & 0x3) {
>          flags |= FCMP_UN;
>      }
>      if (cond & 0x4) {
>          flags |= FCMP_GT;
>      }
>      return flags;
> }
> 
> Is this right?

No.  You're not converting anything here.

I think you should simply replace "0x" with "0b" so that the bits of FCMP are more 
compact.  I assume that's what you were originally thinking.

#define FCMP_LT   0b0001  /* fp0 < fp1 */
#define FCMP_EQ   0b0010  /* fp0 = fp1 */
#define FCMP_UN   0b0100  /* unordered */
#define FCMP_GT   0b1000  /* fp0 > fp1 */

or identically with the form (1 << 0), (1 << 1), etc.


r~

Re: [PATCH v11 11/26] target/loongarch: Add floating point comparison instruction translation

Posted by gaosong 2 years, 11 months ago

Hi Richard.

On 2021/11/30 下午4:37, Richard Henderson wrote:
>
> I think you should simply replace "0x" with "0b" so that the bits of 
> FCMP are more compact.  I assume that's what you were originally 
> thinking.
Ooh,  suddenly become clear-minded.

Thanks
Song Gao