[PATCH v2] target/sparc: emulate floating point queue when raising fp traps

Carl Hauser posted 1 patch 3 months, 1 week ago
There is a newer version of this series
target/sparc/cpu.h          |  8 ++++++++
target/sparc/fop_helper.c   | 17 ++++++++++++++++-
target/sparc/helper.h       |  4 ++++
target/sparc/insns.decode   |  4 ++--
target/sparc/int32_helper.c | 13 +++++++++++++
target/sparc/translate.c    | 14 ++++++++++----
6 files changed, 53 insertions(+), 7 deletions(-)
[PATCH v2] target/sparc: emulate floating point queue when raising fp traps
Posted by Carl Hauser 3 months, 1 week ago

 From 9265233081fae546c0459792598a9f1688ddb020 Mon Sep 17 00:00:00 2001
From: Carl Hauser <chauser@pullman.com>
Date: Sat, 10 Aug 2024 15:09:39 -0700
Subject: [PATCH v2] target/sparc: emulate floating point queue when 
raising fp
  traps

Sparc 32-bit machines perform floating point operations in an
asynchronous co-processor. When a FP operation traps for any reason
the entire set of pending operations is dumped to memory as
the "floating point queue" and then processed by kernel trap
handling. SunOS and at least early Solaris 2 releases only
handle traps correctly if they can find the faulting operation
at the head of the floating point queue. Qemu did not previously
implement FP queue handling. This commit adds: additional fields
to the processor state to contain a single-element FP queue;
logic in trap handling to populate the FP queue when a FP
trap occurs; and implementation of the STDFQ instruction that
the kernel uses to extract a FP queue element from the processor
state to memory.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2340
Signed-off-by: Carl Hauser <chauser@pullman.com>
---
Addressed comments about unrelated blank line insertions, and
unneeded #ifdefs. Put lines in insns.c back to original order,
but now with comments and arg specifier for STDFQ.

As for testing unimplemented operations on linux and NetBSD,
my problem is that I can't get gcc to generate quad-precision
code on them at all. Both gcc's generate the same code for long double
that they generate for double. And neither accepts _Float_128 as
a valid type. While I have reasonable confidence that the
implementation of unimplemented operations will work in the
qemu, I don't know if these kernels actually have the necessary
code to emulate the instructions. Of course, if the compilers
have never generated the instructions it's unlikely that there
are any programs out there using them.

  target/sparc/cpu.h          |  8 ++++++++
  target/sparc/fop_helper.c   | 17 ++++++++++++++++-
  target/sparc/helper.h       |  4 ++++
  target/sparc/insns.decode   |  4 ++--
  target/sparc/int32_helper.c | 13 +++++++++++++
  target/sparc/translate.c    | 14 ++++++++++----
  6 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index dfd9512a21..ff48279a55 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -184,6 +184,8 @@ enum {
  #define FSR_FTT_SEQ_ERROR (4ULL << 14)
  #define FSR_FTT_INVAL_FPR (6ULL << 14)

+#define FSR_QNE    (1ULL << 13)
+
  #define FSR_FCC0_SHIFT    10
  #define FSR_FCC1_SHIFT    32
  #define FSR_FCC2_SHIFT    34
@@ -436,6 +438,12 @@ struct CPUArchState {
      /* FPU State Register, in parts */
      uint32_t fsr;                    /* rm, tem, aexc */
      uint32_t fsr_cexc_ftt;           /* cexc, ftt */
+
+    /* single-element FPU fault queue */
+    uint32_t fsr_qne;                /* qne */
+    uint32_t fsr_qi;                 /* faulting fp instruction */
+    target_ulong fsr_qa;             /* address of faulting instruction */
+
      uint32_t fcc[TARGET_FCCREGS];    /* fcc* */

      CPU_DoubleU fpr[TARGET_DPREGS];  /* floating point registers */
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 0b30665b51..0f74ca7908 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -21,6 +21,7 @@
  #include "cpu.h"
  #include "exec/exec-all.h"
  #include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
  #include "fpu/softfloat.h"

  static inline float128 f128_in(Int128 i)
@@ -538,7 +539,7 @@ uint32_t helper_flcmpd(float64 src1, float64 src2)

  target_ulong cpu_get_fsr(CPUSPARCState *env)
  {
-    target_ulong fsr = env->fsr | env->fsr_cexc_ftt;
+    target_ulong fsr = env->fsr | env->fsr_cexc_ftt | env->fsr_qne;

      fsr |= env->fcc[0] << FSR_FCC0_SHIFT;
  #ifdef TARGET_SPARC64
@@ -563,6 +564,7 @@ static void set_fsr_nonsplit(CPUSPARCState *env, 
target_ulong fsr)
      int rnd_mode;

      env->fsr = fsr & (FSR_RD_MASK | FSR_TEM_MASK | FSR_AEXC_MASK);
+    env->fsr_qne = fsr & FSR_QNE;

      switch (fsr & FSR_RD_MASK) {
      case FSR_RD_NEAREST:
@@ -608,3 +610,16 @@ void helper_set_fsr_nofcc(CPUSPARCState *env, 
uint32_t fsr)
      env->fsr_cexc_ftt = fsr & (FSR_CEXC_MASK | FSR_FTT_MASK);
      set_fsr_nonsplit(env, fsr);
  }
+
+#ifndef TARGET_SPARC64
+void helper_store_fp_queue(CPUSPARCState *env, uint32_t addr)
+{
+    if (!env->fsr_qne) {
+        env->fsr_cexc_ftt = FSR_FTT_SEQ_ERROR;
+        cpu_raise_exception_ra(env, TT_FP_EXCP, GETPC());
+    }
+    cpu_stl_be_data(env, addr, env->fsr_qa);
+    cpu_stl_be_data(env, addr + 4, env->fsr_qi);
+    env->fsr_qne = 0;
+}
+#endif
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index 134e519a37..15ae94d0f3 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -85,6 +85,10 @@ DEF_HELPER_FLAGS_2(fitoq, TCG_CALL_NO_WG, i128, env, s32)

  DEF_HELPER_FLAGS_2(fitos, TCG_CALL_NO_WG, f32, env, s32)

+#ifndef TARGET_SPARC64
+DEF_HELPER_2(store_fp_queue, void, env, i32)
+#endif
+
  #ifdef TARGET_SPARC64
  DEF_HELPER_FLAGS_2(fxtos, TCG_CALL_NO_WG, f32, env, s64)
  DEF_HELPER_FLAGS_2(fxtod, TCG_CALL_NO_WG, f64, env, s64)
diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode
index fbcb4f7aef..cc59dcf8d9 100644
--- a/target/sparc/insns.decode
+++ b/target/sparc/insns.decode
@@ -644,8 +644,8 @@ STF         11 ..... 100100 ..... . ............. 
    @r_r_ri_na
  STFSR       11 00000 100101 ..... . .............          @n_r_ri
  STXFSR      11 00001 100101 ..... . .............          @n_r_ri
  {
-  STQF      11 ..... 100110 ..... . .............          @q_r_ri_na
-  STDFQ     11 ----- 100110 ----- - -------------
+  STQF      11 ..... 100110 ..... . .............          @q_r_ri_na 
## SPARC-V9
+  STDFQ     11 ..... 100110 ..... . .............          @r_r_r_asi 
## SPARC-V7-8
  }
  STDF        11 ..... 100111 ..... . .............          @d_r_ri_na

diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c
index 6b7d65b031..f18cd97059 100644
--- a/target/sparc/int32_helper.c
+++ b/target/sparc/int32_helper.c
@@ -22,6 +22,7 @@
  #include "cpu.h"
  #include "trace.h"
  #include "exec/log.h"
+#include "exec/cpu_ldst.h"
  #include "sysemu/runstate.h"


@@ -147,6 +148,18 @@ void sparc_cpu_do_interrupt(CPUState *cs)
      env->psret = 0;
      cwp = cpu_cwp_dec(env, env->cwp - 1);
      cpu_set_cwp(env, cwp);
+    if (intno == TT_FP_EXCP) {
+        env->fsr_qne = FSR_QNE;
+        env->fsr_qa = env->pc;
+        env->fsr_qi = cpu_ldl_code(env, env->fsr_qa);
+         /*
+          * Because of the asynchronous FPU on real Sparc 32 bit
+          * machines, the pc and npc will have already been advanced
+          * by the time that the trap is taken.
+          */
+        env->pc = env->npc;
+        env->npc = env->npc + 4;
+    }
      env->regwptr[9] = env->pc;
      env->regwptr[10] = env->npc;
      env->psrps = env->psrs;
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 113639083b..2160ccb5b9 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -41,6 +41,7 @@
  # define gen_helper_rett(E)                     qemu_build_not_reached()
  # define gen_helper_power_down(E)               qemu_build_not_reached()
  # define gen_helper_wrpsr(E, S)                 qemu_build_not_reached()
+# define gen_helper_store_fp_queue(E, A)        qemu_build_not_reached()
  #else
  # define gen_helper_clear_softint(E, S)         qemu_build_not_reached()
  # define gen_helper_done(E)                     qemu_build_not_reached()
@@ -4521,13 +4522,13 @@ static bool do_st_fpr(DisasContext *dc, 
arg_r_r_ri_asi *a, MemOp sz)

  TRANS(STF, ALL, do_st_fpr, a, MO_32)
  TRANS(STDF, ALL, do_st_fpr, a, MO_64)
-TRANS(STQF, ALL, do_st_fpr, a, MO_128)
+TRANS(STQF, 64, do_st_fpr, a, MO_128)

  TRANS(STFA, 64, do_st_fpr, a, MO_32)
  TRANS(STDFA, 64, do_st_fpr, a, MO_64)
  TRANS(STQFA, 64, do_st_fpr, a, MO_128)

-static bool trans_STDFQ(DisasContext *dc, arg_STDFQ *a)
+static bool trans_STDFQ(DisasContext *dc, arg_r_r_ri_asi *a)
  {
      if (!avail_32(dc)) {
          return false;
@@ -4538,10 +4539,15 @@ static bool trans_STDFQ(DisasContext *dc, 
arg_STDFQ *a)
      if (gen_trap_ifnofpu(dc)) {
          return true;
      }
-    gen_op_fpexception_im(dc, FSR_FTT_SEQ_ERROR);
-    return true;
+    TCGv store_addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
+    if (store_addr == NULL) {
+        return false;
+    }
+    gen_helper_store_fp_queue(tcg_env, store_addr);
+    return advance_pc(dc);
  }

+
  static bool trans_LDFSR(DisasContext *dc, arg_r_r_ri *a)
  {
      TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
-- 
2.34.1