[PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN

Richard Henderson posted 1 patch 1 year, 8 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20220901101509.145758-1-richard.henderson@linaro.org
Maintainers: Laurent Vivier <laurent@vivier.eu>, Yoshinori Sato <ysato@users.sourceforge.jp>
target/sh4/cpu.h        | 56 +++++++++++++------------
linux-user/sh4/signal.c |  6 +--
target/sh4/cpu.c        |  6 +--
target/sh4/helper.c     |  6 +--
target/sh4/translate.c  | 90 ++++++++++++++++++++++-------------------
5 files changed, 88 insertions(+), 76 deletions(-)
[PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Richard Henderson 1 year, 8 months ago
The value previously chosen overlaps GUSA_MASK.

Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
that they are included in TB_FLAGs.  Add aliases for the
FPSCR and SR bits that are included in TB_FLAGS, so that
we don't accidentally reassign those bits.

Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/sh4/cpu.h        | 56 +++++++++++++------------
 linux-user/sh4/signal.c |  6 +--
 target/sh4/cpu.c        |  6 +--
 target/sh4/helper.c     |  6 +--
 target/sh4/translate.c  | 90 ++++++++++++++++++++++-------------------
 5 files changed, 88 insertions(+), 76 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 9f15ef913c..727b829598 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -78,26 +78,33 @@
 #define FPSCR_RM_NEAREST       (0 << 0)
 #define FPSCR_RM_ZERO          (1 << 0)
 
-#define DELAY_SLOT_MASK        0x7
-#define DELAY_SLOT             (1 << 0)
-#define DELAY_SLOT_CONDITIONAL (1 << 1)
-#define DELAY_SLOT_RTE         (1 << 2)
+#define TB_FLAG_DELAY_SLOT       (1 << 0)
+#define TB_FLAG_DELAY_SLOT_COND  (1 << 1)
+#define TB_FLAG_DELAY_SLOT_RTE   (1 << 2)
+#define TB_FLAG_PENDING_MOVCA    (1 << 3)
+#define TB_FLAG_GUSA_SHIFT       4                      /* [11:4] */
+#define TB_FLAG_GUSA_EXCLUSIVE   (1 << 12)
+#define TB_FLAG_UNALIGN          (1 << 13)
+#define TB_FLAG_SR_FD            (1 << SR_FD)           /* 15 */
+#define TB_FLAG_FPSCR_PR         FPSCR_PR               /* 19 */
+#define TB_FLAG_FPSCR_SZ         FPSCR_SZ               /* 20 */
+#define TB_FLAG_FPSCR_FR         FPSCR_FR               /* 21 */
+#define TB_FLAG_SR_RB            (1 << SR_RB)           /* 29 */
+#define TB_FLAG_SR_MD            (1 << SR_MD)           /* 30 */
 
-#define TB_FLAG_PENDING_MOVCA  (1 << 3)
-#define TB_FLAG_UNALIGN        (1 << 4)
-
-#define GUSA_SHIFT             4
-#ifdef CONFIG_USER_ONLY
-#define GUSA_EXCLUSIVE         (1 << 12)
-#define GUSA_MASK              ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
-#else
-/* Provide dummy versions of the above to allow tests against tbflags
-   to be elided while avoiding ifdefs.  */
-#define GUSA_EXCLUSIVE         0
-#define GUSA_MASK              0
-#endif
-
-#define TB_FLAG_ENVFLAGS_MASK  (DELAY_SLOT_MASK | GUSA_MASK)
+#define TB_FLAG_DELAY_SLOT_MASK  (TB_FLAG_DELAY_SLOT |       \
+                                  TB_FLAG_DELAY_SLOT_COND |  \
+                                  TB_FLAG_DELAY_SLOT_RTE)
+#define TB_FLAG_GUSA_MASK        ((0xff << TB_FLAG_GUSA_SHIFT) | \
+                                  TB_FLAG_GUSA_EXCLUSIVE)
+#define TB_FLAG_FPSCR_MASK       (TB_FLAG_FPSCR_PR | \
+                                  TB_FLAG_FPSCR_SZ | \
+                                  TB_FLAG_FPSCR_FR)
+#define TB_FLAG_SR_MASK          (TB_FLAG_SR_FD | \
+                                  TB_FLAG_SR_RB | \
+                                  TB_FLAG_SR_MD)
+#define TB_FLAG_ENVFLAGS_MASK    (TB_FLAG_DELAY_SLOT_MASK | \
+                                  TB_FLAG_GUSA_MASK)
 
 typedef struct tlb_t {
     uint32_t vpn;		/* virtual page number */
@@ -258,7 +265,7 @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
 {
     /* The instruction in a RTE delay slot is fetched in privileged
        mode, but executed in user mode.  */
-    if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
+    if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
         return 0;
     } else {
         return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
@@ -366,11 +373,10 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
 {
     *pc = env->pc;
     /* For a gUSA region, notice the end of the region.  */
-    *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
-    *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
-            | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
-            | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
-            | (env->sr & (1u << SR_FD))                        /* Bit 15 */
+    *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
+    *flags = env->flags
+            | (env->fpscr & TB_FLAG_FPSCR_MASK)
+            | (env->sr & TB_FLAG_SR_MASK)
             | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
 #ifdef CONFIG_USER_ONLY
     *flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index f6a18bc6b5..c4ba962708 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -161,7 +161,7 @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
     __get_user(regs->fpul, &sc->sc_fpul);
 
     regs->tra = -1;         /* disable syscall checks */
-    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
+    regs->flags = 0;
 }
 
 void setup_frame(int sig, struct target_sigaction *ka,
@@ -199,7 +199,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
     regs->gregs[5] = 0;
     regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
     regs->pc = (unsigned long) ka->_sa_handler;
-    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
+    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
 
     unlock_user_struct(frame, frame_addr, 1);
     return;
@@ -251,7 +251,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
     regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
     regs->pc = (unsigned long) ka->_sa_handler;
-    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
+    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
 
     unlock_user_struct(frame, frame_addr, 1);
     return;
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 06b2691dc4..65643b6b1c 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -40,7 +40,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
     SuperHCPU *cpu = SUPERH_CPU(cs);
 
     cpu->env.pc = tb->pc;
-    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
+    cpu->env.flags = tb->flags;
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -50,10 +50,10 @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
     SuperHCPU *cpu = SUPERH_CPU(cs);
     CPUSH4State *env = &cpu->env;
 
-    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
+    if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
         && env->pc != tb->pc) {
         env->pc -= 2;
-        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+        env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
         return true;
     }
     return false;
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 6a620e36fc..e02e7af607 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -147,11 +147,11 @@ void superh_cpu_do_interrupt(CPUState *cs)
     env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
     env->lock_addr = -1;
 
-    if (env->flags & DELAY_SLOT_MASK) {
+    if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
         /* Branch instruction should be executed again before delay slot. */
 	env->spc -= 2;
 	/* Clear flags for exception/interrupt routine. */
-        env->flags &= ~DELAY_SLOT_MASK;
+        env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
     }
 
     if (do_exp) {
@@ -786,7 +786,7 @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
         CPUSH4State *env = &cpu->env;
 
         /* Delay slots are indivisible, ignore interrupts */
-        if (env->flags & DELAY_SLOT_MASK) {
+        if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
             return false;
         } else {
             superh_cpu_do_interrupt(cs);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index f1b190e7cf..68d539c7f6 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -175,13 +175,13 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 		    i, env->gregs[i], i + 1, env->gregs[i + 1],
 		    i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
     }
-    if (env->flags & DELAY_SLOT) {
+    if (env->flags & TB_FLAG_DELAY_SLOT) {
         qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
 		    env->delayed_pc);
-    } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
+    } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
         qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
 		    env->delayed_pc);
-    } else if (env->flags & DELAY_SLOT_RTE) {
+    } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
         qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
                      env->delayed_pc);
     }
@@ -223,7 +223,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
 
 static inline bool use_exit_tb(DisasContext *ctx)
 {
-    return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
+    return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
 }
 
 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
@@ -276,12 +276,12 @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
     TCGLabel *l1 = gen_new_label();
     TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
 
-    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
         /* When in an exclusive region, we must continue to the end.
            Therefore, exit the region on a taken branch, but otherwise
            fall through to the next instruction.  */
         tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
-        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
         /* Note that this won't actually use a goto_tb opcode because we
            disallow it in use_goto_tb, but it handles exit + singlestep.  */
         gen_goto_tb(ctx, 0, dest);
@@ -307,14 +307,14 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
     tcg_gen_mov_i32(ds, cpu_delayed_cond);
     tcg_gen_discard_i32(cpu_delayed_cond);
 
-    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
         /* When in an exclusive region, we must continue to the end.
            Therefore, exit the region on a taken branch, but otherwise
            fall through to the next instruction.  */
         tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
 
         /* Leave the gUSA region.  */
-        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
         gen_jump(ctx);
 
         gen_set_label(l1);
@@ -361,8 +361,8 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
 
 #define CHECK_NOT_DELAY_SLOT \
-    if (ctx->envflags & DELAY_SLOT_MASK) {  \
-        goto do_illegal_slot;               \
+    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {  \
+        goto do_illegal_slot;                       \
     }
 
 #define CHECK_PRIVILEGED \
@@ -436,7 +436,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0x000b:		/* rts */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0028:		/* clrmac */
@@ -458,7 +458,7 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
         gen_write_sr(cpu_ssr);
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
-        ctx->envflags |= DELAY_SLOT_RTE;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
 	ctx->delayed_pc = (uint32_t) - 1;
         ctx->base.is_jmp = DISAS_STOP;
 	return;
@@ -513,12 +513,15 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xe000:		/* mov #imm,Rn */
 #ifdef CONFIG_USER_ONLY
-        /* Detect the start of a gUSA region.  If so, update envflags
-           and end the TB.  This will allow us to see the end of the
-           region (stored in R0) in the next TB.  */
+        /*
+         * Detect the start of a gUSA region (mov #-n, r15).
+         * If so, update envflags and end the TB.  This will allow us
+         * to see the end of the region (stored in R0) in the next TB.
+         */
         if (B11_8 == 15 && B7_0s < 0 &&
             (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
-            ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
+            ctx->envflags =
+                deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
             ctx->base.is_jmp = DISAS_STOP;
         }
 #endif
@@ -544,13 +547,13 @@ static void _decode_opc(DisasContext * ctx)
     case 0xa000:		/* bra disp */
 	CHECK_NOT_DELAY_SLOT
         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	return;
     case 0xb000:		/* bsr disp */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	return;
     }
 
@@ -1194,7 +1197,7 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
-        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
 	return;
     case 0x8900:		/* bt label */
 	CHECK_NOT_DELAY_SLOT
@@ -1204,7 +1207,7 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
-        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
 	return;
     case 0x8800:		/* cmp/eq #imm,R0 */
         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
@@ -1388,14 +1391,14 @@ static void _decode_opc(DisasContext * ctx)
     case 0x0023:		/* braf Rn */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0003:		/* bsrf Rn */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
 	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x4015:		/* cmp/pl Rn */
@@ -1411,14 +1414,14 @@ static void _decode_opc(DisasContext * ctx)
     case 0x402b:		/* jmp @Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400b:		/* jsr @Rn */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400e:		/* ldc Rm,SR */
@@ -1839,7 +1842,7 @@ static void _decode_opc(DisasContext * ctx)
     fflush(stderr);
 #endif
  do_illegal:
-    if (ctx->envflags & DELAY_SLOT_MASK) {
+    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
  do_illegal_slot:
         gen_save_cpu_state(ctx, true);
         gen_helper_raise_slot_illegal_instruction(cpu_env);
@@ -1852,7 +1855,7 @@ static void _decode_opc(DisasContext * ctx)
 
  do_fpu_disabled:
     gen_save_cpu_state(ctx, true);
-    if (ctx->envflags & DELAY_SLOT_MASK) {
+    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
         gen_helper_raise_slot_fpu_disable(cpu_env);
     } else {
         gen_helper_raise_fpu_disable(cpu_env);
@@ -1867,23 +1870,23 @@ static void decode_opc(DisasContext * ctx)
 
     _decode_opc(ctx);
 
-    if (old_flags & DELAY_SLOT_MASK) {
+    if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
         /* go out of the delay slot */
-        ctx->envflags &= ~DELAY_SLOT_MASK;
+        ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
 
         /* When in an exclusive region, we must continue to the end
            for conditional branches.  */
-        if (ctx->tbflags & GUSA_EXCLUSIVE
-            && old_flags & DELAY_SLOT_CONDITIONAL) {
+        if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
+            && old_flags & TB_FLAG_DELAY_SLOT_COND) {
             gen_delayed_conditional_jump(ctx);
             return;
         }
         /* Otherwise this is probably an invalid gUSA region.
            Drop the GUSA bits so the next TB doesn't see them.  */
-        ctx->envflags &= ~GUSA_MASK;
+        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
 
         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
-        if (old_flags & DELAY_SLOT_CONDITIONAL) {
+        if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
 	    gen_delayed_conditional_jump(ctx);
         } else {
             gen_jump(ctx);
@@ -2223,7 +2226,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
     }
 
     /* The entire region has been translated.  */
-    ctx->envflags &= ~GUSA_MASK;
+    ctx->envflags &= ~TB_FLAG_GUSA_MASK;
     ctx->base.pc_next = pc_end;
     ctx->base.num_insns += max_insns - 1;
     return;
@@ -2234,7 +2237,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
 
     /* Restart with the EXCLUSIVE bit set, within a TB run via
        cpu_exec_step_atomic holding the exclusive lock.  */
-    ctx->envflags |= GUSA_EXCLUSIVE;
+    ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
     gen_save_cpu_state(ctx, false);
     gen_helper_exclusive(cpu_env);
     ctx->base.is_jmp = DISAS_NORETURN;
@@ -2267,17 +2270,19 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
                   (tbflags & (1 << SR_RB))) * 0x10;
     ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
 
-    if (tbflags & GUSA_MASK) {
+#ifdef CONFIG_USER_ONLY
+    if (tbflags & TB_FLAG_GUSA_MASK) {
+        /* In gUSA exclusive region. */
         uint32_t pc = ctx->base.pc_next;
         uint32_t pc_end = ctx->base.tb->cs_base;
-        int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
+        int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
         int max_insns = (pc_end - pc) / 2;
 
         if (pc != pc_end + backup || max_insns < 2) {
             /* This is a malformed gUSA region.  Don't do anything special,
                since the interpreter is likely to get confused.  */
-            ctx->envflags &= ~GUSA_MASK;
-        } else if (tbflags & GUSA_EXCLUSIVE) {
+            ctx->envflags &= ~TB_FLAG_GUSA_MASK;
+        } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
             /* Regardless of single-stepping or the end of the page,
                we must complete execution of the gUSA region while
                holding the exclusive lock.  */
@@ -2285,6 +2290,7 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
             return;
         }
     }
+#endif
 
     /* Since the ISA is fixed-width, we can bound by the number
        of instructions remaining on the page.  */
@@ -2309,8 +2315,8 @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
 
 #ifdef CONFIG_USER_ONLY
-    if (unlikely(ctx->envflags & GUSA_MASK)
-        && !(ctx->envflags & GUSA_EXCLUSIVE)) {
+    if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
+        && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
         /* We're in an gUSA region, and we have not already fallen
            back on using an exclusive region.  Attempt to parse the
            region into a single supported atomic operation.  Failure
@@ -2330,9 +2336,9 @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
 
-    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
         /* Ending the region of exclusivity.  Clear the bits.  */
-        ctx->envflags &= ~GUSA_MASK;
+        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
     }
 
     switch (ctx->base.is_jmp) {
-- 
2.34.1
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Guenter Roeck 1 year, 5 months ago
Hi,

On Thu, Sep 01, 2022 at 11:15:09AM +0100, Richard Henderson wrote:
> The value previously chosen overlaps GUSA_MASK.
> 
> Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
> that they are included in TB_FLAGs.  Add aliases for the
> FPSCR and SR bits that are included in TB_FLAGS, so that
> we don't accidentally reassign those bits.
> 
> Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

I noticed that my sh4 emulations crash randomly with qemu v7.2-rc4.
This happens with all Linux kernel versions. Testing shows that this
patch is responsible. Reverting it fixes the problem.

Some of the symptoms are attached below.

Guenter

---
Symptoms:

- Random crashes, such as

Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
CPU: 0 PID: 1 Comm: init Not tainted 5.10.158 #1
Stack: (0x8c821e60 to 0x8c822000)
1e60: 8c436726 00000000 8c5db1fc 8c011a64 8ca7aa80 8c821e9c ab2577ac 8c021fca 
1e80: 8c011a64 8c81dde0 00020000 8c81dda0 00000000 0000000b 8c81f8e0 0000000b 
1ea0: 8c81f8e0 00000001 00000000 8c81fb9c 00000000 8c821eb0 8c821f5c 8c821fa4 
1ec0: 8c81fa5c 8c81fc1c 000000cd 00000000 00000000 00000000 ab2577ac 8c022af8 
1ee0: 8c81dda0 8c81dde0 00020000 8c821f5c 8c81dde0 8c81dda0 0000000b 8c02b1e8 
1f00: 8c821f5c 400004d8 8c821f48 8c011a64 0000000a 0000000a 8c81ca60 8c012db4 
1f20: 29558c9c 00000406 295f9294 8c821fe4 8c57702c 8c821fa4 09000002 8c821f68 
1f40: 8c011a64 295f9294 8c02b0d2 29558c9c 00000406 8c57702c 0000000b 0000000b 
1f60: 00000000 00000001 00000008 00000000 00000000 00000000 00000000 00000000 
1f80: ab2577ac 8c0150f8 29558c9c 00000406 295f9294 00000000 40008000 8c0150ec 
1fa0: 8c820000 7bfcfadc ffffffff 00000040 000080f0 cfffffff 00000000 00000000 
1fc0: 8c820000 295fae80 0d39ad3d 295fae80 295630ee 295f9294 00000406 29558c9c 
1fe0: 7bfcfadc 295af5ac 295af6ea 00008100 295fafbc 00000000 0d39acf0 ffffffff 

Call trace:
 [<8c436d88>] printk+0x0/0x48
 [<8c011a64>] arch_local_irq_restore+0x0/0x24
 [<8c021fca>] do_exit+0x8a6/0x8f0
 [<8c011a64>] arch_local_irq_restore+0x0/0x24
 [<8c022af8>] do_group_exit+0x34/0x90
 [<8c02b1e8>] get_signal+0xd8/0x5f8
 [<8c011a64>] arch_local_irq_restore+0x0/0x24
 [<8c012db4>] do_notify_resume+0x6c/0x54c
 [<8c011a64>] arch_local_irq_restore+0x0/0x24
 [<8c02b0d2>] force_sig_fault_to_task+0x3a/0x6c
 [<8c0150f8>] resume_userspace+0x0/0x10
 [<8c0150ec>] ret_from_exception+0x0/0xc

Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b^M
^M
CPU: 0 PID: 1 Comm: init Not tainted 4.14.301 #1^M
Stack: (0x8fc19e08 to 0x8fc1a000)^M
...

- Alleged FPU use

BUG: FPU is used in kernel mode.
------------[ cut here ]------------
kernel BUG at arch/sh/kernel/cpu/fpu.c:60!
Kernel BUG: 003e [#1]
Modules linked in:

CPU: 0 PID: 1166 Comm: sh Not tainted 4.14.301-rc2-00084-gdd6fc0ede260 #1
task: 8ff38800 task.stack: 8f40e000
PC is at fpu_state_restore+0x60/0x88
PR is at fpu_state_restore+0x60/0x88
PC  : 8c01969c SP  : 8fc2be6c SR  : 400080f1
TEA : 004382e8
R0  : 00000020 R1  : 8c4f21a4 R2  : 8c4f21a4 R3  : 8c011be8
R4  : 000000f0 R5  : 00000000 R6  : 00000023 R7  : 8c1b97e0
R8  : 8fc2bec0 R9  : 8ff38800 R10 : 8c0196c4 R11 : 00000000
R12 : 8c011be0 R13 : 8ff38800 R14 : 8f40fe24
MACH: 000003de MACL: 00000184 GBR : 295fafbc PR  : 8c01969c

Call trace:
 [<8c0196d0>] fpu_state_restore_trap_handler+0xc/0x18
 [<8c0196c4>] fpu_state_restore_trap_handler+0x0/0x18
 [<8c0150ec>] ret_from_exception+0x0/0xc
 [<8c0150ec>] ret_from_exception+0x0/0xc
 [<8c3cb1dc>] __schedule+0x1bc/0x50c
 [<8c011be0>] arch_local_save_flags+0x0/0x8
 [<8c017016>] save_fpu+0x16/0x80
 [<8c011fd6>] __switch_to+0x5a/0x8c
 [<8c3cb1dc>] __schedule+0x1bc/0x50c
 [<8c011be0>] arch_local_save_flags+0x0/0x8
 ...

- Alleged unhandled unaligned access errors in different locations
  (varies per run)

Fixing up unaligned userspace access in "S40network" pid=1111 pc=0x0043761e ins=0x112d
Fixing up unaligned userspace access in "S40network" pid=1111 pc=0x0043761e ins=0x112d
Sending SIGBUS to "S40network" due to unaligned access (PC 43761e PR 295b6796)
Bus error

Fixing up unaligned userspace access in "sh" pid=1122 pc=0x295b1714 ins=0x1123
Fixing up unaligned userspace access in "sh" pid=1122 pc=0x295b1714 ins=0x1123
Sending SIGBUS to "sh" due to unaligned access (PC 295b1714 PR 295b170c)

Fixing up unaligned userspace access in "klogd" pid=1084 pc=0x295ac464 ins=0x2922
Fixing up unaligned userspace access in "klogd" pid=1084 pc=0x295ac464 ins=0x2922
Sending SIGBUS to "klogd" due to unaligned access (PC 295ac464 PR 295ac45c)
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Guenter Roeck 1 year, 5 months ago
On Sat, Dec 10, 2022 at 07:27:46AM -0800, Guenter Roeck wrote:
> Hi,
> 
> On Thu, Sep 01, 2022 at 11:15:09AM +0100, Richard Henderson wrote:
> > The value previously chosen overlaps GUSA_MASK.
> > 
> > Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
> > that they are included in TB_FLAGs.  Add aliases for the
> > FPSCR and SR bits that are included in TB_FLAGS, so that
> > we don't accidentally reassign those bits.
> > 
> > Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> 
> I noticed that my sh4 emulations crash randomly with qemu v7.2-rc4.
> This happens with all Linux kernel versions. Testing shows that this
> patch is responsible. Reverting it fixes the problem.
> 

The patch below fixes the problem for me.

Guenter

---
From d488bcad383f360e522dbffe0d21f8ad39d33c61 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 11 Dec 2022 14:14:47 -0800
Subject: [PATCH] target/sh4: Fix unaligned handling

Commit ab419fd8a035 ("target/sh4: Fix TB_FLAG_UNALIGN") made a number
of changes which seemed unrelated to the problem being fixed. In addition
to updating updating masks, it eliminated various mask operations.
This results in a number of inexplicable crashes, often associated
with alleged unaligned operations.

Improve alignment with the original code. Reintroduce mask operations,
and undo an added '#ifdef CONFIG_USER_ONLY'.

While I have no real idea what I am doing, the resulting code no longer
crashes, so it must do some good.

Fixes: ab419fd8a035 ("target/sh4: Fix TB_FLAG_UNALIGN")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 linux-user/sh4/signal.c | 2 +-
 target/sh4/cpu.c        | 2 +-
 target/sh4/translate.c  | 2 --
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index c4ba962708..2135e2b881 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -161,7 +161,7 @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
     __get_user(regs->fpul, &sc->sc_fpul);
 
     regs->tra = -1;         /* disable syscall checks */
-    regs->flags = 0;
+    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
 }
 
 void setup_frame(int sig, struct target_sigaction *ka,
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 453268392b..827cee25af 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -47,7 +47,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
     SuperHCPU *cpu = SUPERH_CPU(cs);
 
     cpu->env.pc = tb_pc(tb);
-    cpu->env.flags = tb->flags;
+    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
 }
 
 static void superh_restore_state_to_opc(CPUState *cs,
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 7db3468b01..546c182463 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -2270,7 +2270,6 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
                   (tbflags & (1 << SR_RB))) * 0x10;
     ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
 
-#ifdef CONFIG_USER_ONLY
     if (tbflags & TB_FLAG_GUSA_MASK) {
         /* In gUSA exclusive region. */
         uint32_t pc = ctx->base.pc_next;
@@ -2290,7 +2289,6 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
             return;
         }
     }
-#endif
 
     /* Since the ISA is fixed-width, we can bound by the number
        of instructions remaining on the page.  */
-- 
2.36.2
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Richard Henderson 1 year, 5 months ago
On 12/11/22 19:13, Guenter Roeck wrote:
> On Sat, Dec 10, 2022 at 07:27:46AM -0800, Guenter Roeck wrote:
>> Hi,
>>
>> On Thu, Sep 01, 2022 at 11:15:09AM +0100, Richard Henderson wrote:
>>> The value previously chosen overlaps GUSA_MASK.
>>>
>>> Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
>>> that they are included in TB_FLAGs.  Add aliases for the
>>> FPSCR and SR bits that are included in TB_FLAGS, so that
>>> we don't accidentally reassign those bits.
>>>
>>> Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>>
>> I noticed that my sh4 emulations crash randomly with qemu v7.2-rc4.
>> This happens with all Linux kernel versions. Testing shows that this
>> patch is responsible. Reverting it fixes the problem.
>>
> 
> The patch below fixes the problem for me.

Thanks for the investigation.


> +++ b/target/sh4/cpu.c
> @@ -47,7 +47,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
>       SuperHCPU *cpu = SUPERH_CPU(cs);
>   
>       cpu->env.pc = tb_pc(tb);
> -    cpu->env.flags = tb->flags;
> +    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;

Only this hunk should be necessary.



>   }
>   
>   static void superh_restore_state_to_opc(CPUState *cs,
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 7db3468b01..546c182463 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -2270,7 +2270,6 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>                     (tbflags & (1 << SR_RB))) * 0x10;
>       ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
>   
> -#ifdef CONFIG_USER_ONLY
>       if (tbflags & TB_FLAG_GUSA_MASK) {
>           /* In gUSA exclusive region. */
>           uint32_t pc = ctx->base.pc_next;
> @@ -2290,7 +2289,6 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>               return;
>           }
>       }
> -#endif

This one is actively wrong.


r~
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Guenter Roeck 1 year, 5 months ago
On Mon, Dec 12, 2022 at 08:30:42AM -0600, Richard Henderson wrote:
> On 12/11/22 19:13, Guenter Roeck wrote:
> > On Sat, Dec 10, 2022 at 07:27:46AM -0800, Guenter Roeck wrote:
> > > Hi,
> > > 
> > > On Thu, Sep 01, 2022 at 11:15:09AM +0100, Richard Henderson wrote:
> > > > The value previously chosen overlaps GUSA_MASK.
> > > > 
> > > > Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
> > > > that they are included in TB_FLAGs.  Add aliases for the
> > > > FPSCR and SR bits that are included in TB_FLAGS, so that
> > > > we don't accidentally reassign those bits.
> > > > 
> > > > Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
> > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
> > > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > > 
> > > I noticed that my sh4 emulations crash randomly with qemu v7.2-rc4.
> > > This happens with all Linux kernel versions. Testing shows that this
> > > patch is responsible. Reverting it fixes the problem.
> > > 
> > 
> > The patch below fixes the problem for me.
> 
> Thanks for the investigation.
> 
> 
> > +++ b/target/sh4/cpu.c
> > @@ -47,7 +47,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
> >       SuperHCPU *cpu = SUPERH_CPU(cs);
> >       cpu->env.pc = tb_pc(tb);
> > -    cpu->env.flags = tb->flags;
> > +    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
> 
> Only this hunk should be necessary.
> 

Confirmed.

Do you plan to send a formal patch, or do you want me to do it ?

Thanks,
Guenter
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Richard Henderson 1 year, 5 months ago
On 12/12/22 22:55, Guenter Roeck wrote:
> Do you plan to send a formal patch, or do you want me to do it ?

I can send a patch.


r~
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Guenter Roeck 1 year, 5 months ago
On 12/12/22 06:30, Richard Henderson wrote:
> On 12/11/22 19:13, Guenter Roeck wrote:
>> On Sat, Dec 10, 2022 at 07:27:46AM -0800, Guenter Roeck wrote:
>>> Hi,
>>>
>>> On Thu, Sep 01, 2022 at 11:15:09AM +0100, Richard Henderson wrote:
>>>> The value previously chosen overlaps GUSA_MASK.
>>>>
>>>> Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
>>>> that they are included in TB_FLAGs.  Add aliases for the
>>>> FPSCR and SR bits that are included in TB_FLAGS, so that
>>>> we don't accidentally reassign those bits.
>>>>
>>>> Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
>>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>>>
>>> I noticed that my sh4 emulations crash randomly with qemu v7.2-rc4.
>>> This happens with all Linux kernel versions. Testing shows that this
>>> patch is responsible. Reverting it fixes the problem.
>>>
>>
>> The patch below fixes the problem for me.
> 
> Thanks for the investigation.
> 
> 
>> +++ b/target/sh4/cpu.c
>> @@ -47,7 +47,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
>>       SuperHCPU *cpu = SUPERH_CPU(cs);
>>       cpu->env.pc = tb_pc(tb);
>> -    cpu->env.flags = tb->flags;
>> +    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
> 
> Only this hunk should be necessary.
> 

I'll give it a try.

Thanks,
Guenter


Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Yoshinori Sato 1 year, 8 months ago
On Thu, 01 Sep 2022 19:15:09 +0900,
Richard Henderson wrote:
> 
> The value previously chosen overlaps GUSA_MASK.
> 
> Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
> that they are included in TB_FLAGs.  Add aliases for the
> FPSCR and SR bits that are included in TB_FLAGS, so that
> we don't accidentally reassign those bits.
> 
> Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/sh4/cpu.h        | 56 +++++++++++++------------
>  linux-user/sh4/signal.c |  6 +--
>  target/sh4/cpu.c        |  6 +--
>  target/sh4/helper.c     |  6 +--
>  target/sh4/translate.c  | 90 ++++++++++++++++++++++-------------------
>  5 files changed, 88 insertions(+), 76 deletions(-)
> 
> diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
> index 9f15ef913c..727b829598 100644
> --- a/target/sh4/cpu.h
> +++ b/target/sh4/cpu.h
> @@ -78,26 +78,33 @@
>  #define FPSCR_RM_NEAREST       (0 << 0)
>  #define FPSCR_RM_ZERO          (1 << 0)
>  
> -#define DELAY_SLOT_MASK        0x7
> -#define DELAY_SLOT             (1 << 0)
> -#define DELAY_SLOT_CONDITIONAL (1 << 1)
> -#define DELAY_SLOT_RTE         (1 << 2)
> +#define TB_FLAG_DELAY_SLOT       (1 << 0)
> +#define TB_FLAG_DELAY_SLOT_COND  (1 << 1)
> +#define TB_FLAG_DELAY_SLOT_RTE   (1 << 2)
> +#define TB_FLAG_PENDING_MOVCA    (1 << 3)
> +#define TB_FLAG_GUSA_SHIFT       4                      /* [11:4] */
> +#define TB_FLAG_GUSA_EXCLUSIVE   (1 << 12)
> +#define TB_FLAG_UNALIGN          (1 << 13)
> +#define TB_FLAG_SR_FD            (1 << SR_FD)           /* 15 */
> +#define TB_FLAG_FPSCR_PR         FPSCR_PR               /* 19 */
> +#define TB_FLAG_FPSCR_SZ         FPSCR_SZ               /* 20 */
> +#define TB_FLAG_FPSCR_FR         FPSCR_FR               /* 21 */
> +#define TB_FLAG_SR_RB            (1 << SR_RB)           /* 29 */
> +#define TB_FLAG_SR_MD            (1 << SR_MD)           /* 30 */
>  
> -#define TB_FLAG_PENDING_MOVCA  (1 << 3)
> -#define TB_FLAG_UNALIGN        (1 << 4)
> -
> -#define GUSA_SHIFT             4
> -#ifdef CONFIG_USER_ONLY
> -#define GUSA_EXCLUSIVE         (1 << 12)
> -#define GUSA_MASK              ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
> -#else
> -/* Provide dummy versions of the above to allow tests against tbflags
> -   to be elided while avoiding ifdefs.  */
> -#define GUSA_EXCLUSIVE         0
> -#define GUSA_MASK              0
> -#endif
> -
> -#define TB_FLAG_ENVFLAGS_MASK  (DELAY_SLOT_MASK | GUSA_MASK)
> +#define TB_FLAG_DELAY_SLOT_MASK  (TB_FLAG_DELAY_SLOT |       \
> +                                  TB_FLAG_DELAY_SLOT_COND |  \
> +                                  TB_FLAG_DELAY_SLOT_RTE)
> +#define TB_FLAG_GUSA_MASK        ((0xff << TB_FLAG_GUSA_SHIFT) | \
> +                                  TB_FLAG_GUSA_EXCLUSIVE)
> +#define TB_FLAG_FPSCR_MASK       (TB_FLAG_FPSCR_PR | \
> +                                  TB_FLAG_FPSCR_SZ | \
> +                                  TB_FLAG_FPSCR_FR)
> +#define TB_FLAG_SR_MASK          (TB_FLAG_SR_FD | \
> +                                  TB_FLAG_SR_RB | \
> +                                  TB_FLAG_SR_MD)
> +#define TB_FLAG_ENVFLAGS_MASK    (TB_FLAG_DELAY_SLOT_MASK | \
> +                                  TB_FLAG_GUSA_MASK)
>  
>  typedef struct tlb_t {
>      uint32_t vpn;		/* virtual page number */
> @@ -258,7 +265,7 @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
>  {
>      /* The instruction in a RTE delay slot is fetched in privileged
>         mode, but executed in user mode.  */
> -    if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
> +    if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
>          return 0;
>      } else {
>          return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
> @@ -366,11 +373,10 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
>  {
>      *pc = env->pc;
>      /* For a gUSA region, notice the end of the region.  */
> -    *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
> -    *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
> -            | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
> -            | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
> -            | (env->sr & (1u << SR_FD))                        /* Bit 15 */
> +    *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
> +    *flags = env->flags
> +            | (env->fpscr & TB_FLAG_FPSCR_MASK)
> +            | (env->sr & TB_FLAG_SR_MASK)
>              | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
>  #ifdef CONFIG_USER_ONLY
>      *flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
> diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
> index f6a18bc6b5..c4ba962708 100644
> --- a/linux-user/sh4/signal.c
> +++ b/linux-user/sh4/signal.c
> @@ -161,7 +161,7 @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
>      __get_user(regs->fpul, &sc->sc_fpul);
>  
>      regs->tra = -1;         /* disable syscall checks */
> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
> +    regs->flags = 0;
>  }
>  
>  void setup_frame(int sig, struct target_sigaction *ka,
> @@ -199,7 +199,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
>      regs->gregs[5] = 0;
>      regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
>      regs->pc = (unsigned long) ka->_sa_handler;
> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
> +    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
>  
>      unlock_user_struct(frame, frame_addr, 1);
>      return;
> @@ -251,7 +251,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
>      regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
>      regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
>      regs->pc = (unsigned long) ka->_sa_handler;
> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
> +    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
>  
>      unlock_user_struct(frame, frame_addr, 1);
>      return;
> diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
> index 06b2691dc4..65643b6b1c 100644
> --- a/target/sh4/cpu.c
> +++ b/target/sh4/cpu.c
> @@ -40,7 +40,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
>      SuperHCPU *cpu = SUPERH_CPU(cs);
>  
>      cpu->env.pc = tb->pc;
> -    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
> +    cpu->env.flags = tb->flags;
>  }
>  
>  #ifndef CONFIG_USER_ONLY
> @@ -50,10 +50,10 @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
>      SuperHCPU *cpu = SUPERH_CPU(cs);
>      CPUSH4State *env = &cpu->env;
>  
> -    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
> +    if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
>          && env->pc != tb->pc) {
>          env->pc -= 2;
> -        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
> +        env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
>          return true;
>      }
>      return false;
> diff --git a/target/sh4/helper.c b/target/sh4/helper.c
> index 6a620e36fc..e02e7af607 100644
> --- a/target/sh4/helper.c
> +++ b/target/sh4/helper.c
> @@ -147,11 +147,11 @@ void superh_cpu_do_interrupt(CPUState *cs)
>      env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
>      env->lock_addr = -1;
>  
> -    if (env->flags & DELAY_SLOT_MASK) {
> +    if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
>          /* Branch instruction should be executed again before delay slot. */
>  	env->spc -= 2;
>  	/* Clear flags for exception/interrupt routine. */
> -        env->flags &= ~DELAY_SLOT_MASK;
> +        env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
>      }
>  
>      if (do_exp) {
> @@ -786,7 +786,7 @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
>          CPUSH4State *env = &cpu->env;
>  
>          /* Delay slots are indivisible, ignore interrupts */
> -        if (env->flags & DELAY_SLOT_MASK) {
> +        if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
>              return false;
>          } else {
>              superh_cpu_do_interrupt(cs);
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index f1b190e7cf..68d539c7f6 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -175,13 +175,13 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
>  		    i, env->gregs[i], i + 1, env->gregs[i + 1],
>  		    i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
>      }
> -    if (env->flags & DELAY_SLOT) {
> +    if (env->flags & TB_FLAG_DELAY_SLOT) {
>          qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
>  		    env->delayed_pc);
> -    } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
> +    } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
>          qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
>  		    env->delayed_pc);
> -    } else if (env->flags & DELAY_SLOT_RTE) {
> +    } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
>          qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
>                       env->delayed_pc);
>      }
> @@ -223,7 +223,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
>  
>  static inline bool use_exit_tb(DisasContext *ctx)
>  {
> -    return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
> +    return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
>  }
>  
>  static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
> @@ -276,12 +276,12 @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
>      TCGLabel *l1 = gen_new_label();
>      TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
>  
> -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
>          /* When in an exclusive region, we must continue to the end.
>             Therefore, exit the region on a taken branch, but otherwise
>             fall through to the next instruction.  */
>          tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
> -        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
> +        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
>          /* Note that this won't actually use a goto_tb opcode because we
>             disallow it in use_goto_tb, but it handles exit + singlestep.  */
>          gen_goto_tb(ctx, 0, dest);
> @@ -307,14 +307,14 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
>      tcg_gen_mov_i32(ds, cpu_delayed_cond);
>      tcg_gen_discard_i32(cpu_delayed_cond);
>  
> -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
>          /* When in an exclusive region, we must continue to the end.
>             Therefore, exit the region on a taken branch, but otherwise
>             fall through to the next instruction.  */
>          tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
>  
>          /* Leave the gUSA region.  */
> -        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
> +        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
>          gen_jump(ctx);
>  
>          gen_set_label(l1);
> @@ -361,8 +361,8 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
>  #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
>  
>  #define CHECK_NOT_DELAY_SLOT \
> -    if (ctx->envflags & DELAY_SLOT_MASK) {  \
> -        goto do_illegal_slot;               \
> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {  \
> +        goto do_illegal_slot;                       \
>      }
>  
>  #define CHECK_PRIVILEGED \
> @@ -436,7 +436,7 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x000b:		/* rts */
>  	CHECK_NOT_DELAY_SLOT
>  	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
> -        ctx->envflags |= DELAY_SLOT;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x0028:		/* clrmac */
> @@ -458,7 +458,7 @@ static void _decode_opc(DisasContext * ctx)
>  	CHECK_NOT_DELAY_SLOT
>          gen_write_sr(cpu_ssr);
>  	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
> -        ctx->envflags |= DELAY_SLOT_RTE;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
>  	ctx->delayed_pc = (uint32_t) - 1;
>          ctx->base.is_jmp = DISAS_STOP;
>  	return;
> @@ -513,12 +513,15 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xe000:		/* mov #imm,Rn */
>  #ifdef CONFIG_USER_ONLY
> -        /* Detect the start of a gUSA region.  If so, update envflags
> -           and end the TB.  This will allow us to see the end of the
> -           region (stored in R0) in the next TB.  */
> +        /*
> +         * Detect the start of a gUSA region (mov #-n, r15).
> +         * If so, update envflags and end the TB.  This will allow us
> +         * to see the end of the region (stored in R0) in the next TB.
> +         */
>          if (B11_8 == 15 && B7_0s < 0 &&
>              (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
> -            ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
> +            ctx->envflags =
> +                deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
>              ctx->base.is_jmp = DISAS_STOP;
>          }
>  #endif
> @@ -544,13 +547,13 @@ static void _decode_opc(DisasContext * ctx)
>      case 0xa000:		/* bra disp */
>  	CHECK_NOT_DELAY_SLOT
>          ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
> -        ctx->envflags |= DELAY_SLOT;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>  	return;
>      case 0xb000:		/* bsr disp */
>  	CHECK_NOT_DELAY_SLOT
>          tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
>          ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
> -        ctx->envflags |= DELAY_SLOT;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>  	return;
>      }
>  
> @@ -1194,7 +1197,7 @@ static void _decode_opc(DisasContext * ctx)
>  	CHECK_NOT_DELAY_SLOT
>          tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
>          ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
> -        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
>  	return;
>      case 0x8900:		/* bt label */
>  	CHECK_NOT_DELAY_SLOT
> @@ -1204,7 +1207,7 @@ static void _decode_opc(DisasContext * ctx)
>  	CHECK_NOT_DELAY_SLOT
>          tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
>          ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
> -        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
>  	return;
>      case 0x8800:		/* cmp/eq #imm,R0 */
>          tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
> @@ -1388,14 +1391,14 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x0023:		/* braf Rn */
>  	CHECK_NOT_DELAY_SLOT
>          tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
> -        ctx->envflags |= DELAY_SLOT;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x0003:		/* bsrf Rn */
>  	CHECK_NOT_DELAY_SLOT
>          tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
>  	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
> -        ctx->envflags |= DELAY_SLOT;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x4015:		/* cmp/pl Rn */
> @@ -1411,14 +1414,14 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x402b:		/* jmp @Rn */
>  	CHECK_NOT_DELAY_SLOT
>  	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
> -        ctx->envflags |= DELAY_SLOT;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x400b:		/* jsr @Rn */
>  	CHECK_NOT_DELAY_SLOT
>          tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
>  	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
> -        ctx->envflags |= DELAY_SLOT;
> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x400e:		/* ldc Rm,SR */
> @@ -1839,7 +1842,7 @@ static void _decode_opc(DisasContext * ctx)
>      fflush(stderr);
>  #endif
>   do_illegal:
> -    if (ctx->envflags & DELAY_SLOT_MASK) {
> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
>   do_illegal_slot:
>          gen_save_cpu_state(ctx, true);
>          gen_helper_raise_slot_illegal_instruction(cpu_env);
> @@ -1852,7 +1855,7 @@ static void _decode_opc(DisasContext * ctx)
>  
>   do_fpu_disabled:
>      gen_save_cpu_state(ctx, true);
> -    if (ctx->envflags & DELAY_SLOT_MASK) {
> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
>          gen_helper_raise_slot_fpu_disable(cpu_env);
>      } else {
>          gen_helper_raise_fpu_disable(cpu_env);
> @@ -1867,23 +1870,23 @@ static void decode_opc(DisasContext * ctx)
>  
>      _decode_opc(ctx);
>  
> -    if (old_flags & DELAY_SLOT_MASK) {
> +    if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
>          /* go out of the delay slot */
> -        ctx->envflags &= ~DELAY_SLOT_MASK;
> +        ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
>  
>          /* When in an exclusive region, we must continue to the end
>             for conditional branches.  */
> -        if (ctx->tbflags & GUSA_EXCLUSIVE
> -            && old_flags & DELAY_SLOT_CONDITIONAL) {
> +        if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
> +            && old_flags & TB_FLAG_DELAY_SLOT_COND) {
>              gen_delayed_conditional_jump(ctx);
>              return;
>          }
>          /* Otherwise this is probably an invalid gUSA region.
>             Drop the GUSA bits so the next TB doesn't see them.  */
> -        ctx->envflags &= ~GUSA_MASK;
> +        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
>  
>          tcg_gen_movi_i32(cpu_flags, ctx->envflags);
> -        if (old_flags & DELAY_SLOT_CONDITIONAL) {
> +        if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
>  	    gen_delayed_conditional_jump(ctx);
>          } else {
>              gen_jump(ctx);
> @@ -2223,7 +2226,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
>      }
>  
>      /* The entire region has been translated.  */
> -    ctx->envflags &= ~GUSA_MASK;
> +    ctx->envflags &= ~TB_FLAG_GUSA_MASK;
>      ctx->base.pc_next = pc_end;
>      ctx->base.num_insns += max_insns - 1;
>      return;
> @@ -2234,7 +2237,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
>  
>      /* Restart with the EXCLUSIVE bit set, within a TB run via
>         cpu_exec_step_atomic holding the exclusive lock.  */
> -    ctx->envflags |= GUSA_EXCLUSIVE;
> +    ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
>      gen_save_cpu_state(ctx, false);
>      gen_helper_exclusive(cpu_env);
>      ctx->base.is_jmp = DISAS_NORETURN;
> @@ -2267,17 +2270,19 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>                    (tbflags & (1 << SR_RB))) * 0x10;
>      ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
>  
> -    if (tbflags & GUSA_MASK) {
> +#ifdef CONFIG_USER_ONLY
> +    if (tbflags & TB_FLAG_GUSA_MASK) {
> +        /* In gUSA exclusive region. */
>          uint32_t pc = ctx->base.pc_next;
>          uint32_t pc_end = ctx->base.tb->cs_base;
> -        int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
> +        int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
>          int max_insns = (pc_end - pc) / 2;
>  
>          if (pc != pc_end + backup || max_insns < 2) {
>              /* This is a malformed gUSA region.  Don't do anything special,
>                 since the interpreter is likely to get confused.  */
> -            ctx->envflags &= ~GUSA_MASK;
> -        } else if (tbflags & GUSA_EXCLUSIVE) {
> +            ctx->envflags &= ~TB_FLAG_GUSA_MASK;
> +        } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
>              /* Regardless of single-stepping or the end of the page,
>                 we must complete execution of the gUSA region while
>                 holding the exclusive lock.  */
> @@ -2285,6 +2290,7 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>              return;
>          }
>      }
> +#endif
>  
>      /* Since the ISA is fixed-width, we can bound by the number
>         of instructions remaining on the page.  */
> @@ -2309,8 +2315,8 @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
>      DisasContext *ctx = container_of(dcbase, DisasContext, base);
>  
>  #ifdef CONFIG_USER_ONLY
> -    if (unlikely(ctx->envflags & GUSA_MASK)
> -        && !(ctx->envflags & GUSA_EXCLUSIVE)) {
> +    if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
> +        && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
>          /* We're in an gUSA region, and we have not already fallen
>             back on using an exclusive region.  Attempt to parse the
>             region into a single supported atomic operation.  Failure
> @@ -2330,9 +2336,9 @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>  {
>      DisasContext *ctx = container_of(dcbase, DisasContext, base);
>  
> -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
>          /* Ending the region of exclusivity.  Clear the bits.  */
> -        ctx->envflags &= ~GUSA_MASK;
> +        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
>      }
>  
>      switch (ctx->base.is_jmp) {
> -- 
> 2.34.1
> 

Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>

-- 
Yosinori Sato
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Richard Henderson 1 year, 7 months ago
Ping, or should I create a PR myself?

r~

On 9/1/22 07:15, Yoshinori Sato wrote:
> On Thu, 01 Sep 2022 19:15:09 +0900,
> Richard Henderson wrote:
>>
>> The value previously chosen overlaps GUSA_MASK.
>>
>> Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
>> that they are included in TB_FLAGs.  Add aliases for the
>> FPSCR and SR bits that are included in TB_FLAGS, so that
>> we don't accidentally reassign those bits.
>>
>> Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   target/sh4/cpu.h        | 56 +++++++++++++------------
>>   linux-user/sh4/signal.c |  6 +--
>>   target/sh4/cpu.c        |  6 +--
>>   target/sh4/helper.c     |  6 +--
>>   target/sh4/translate.c  | 90 ++++++++++++++++++++++-------------------
>>   5 files changed, 88 insertions(+), 76 deletions(-)
>>
>> diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
>> index 9f15ef913c..727b829598 100644
>> --- a/target/sh4/cpu.h
>> +++ b/target/sh4/cpu.h
>> @@ -78,26 +78,33 @@
>>   #define FPSCR_RM_NEAREST       (0 << 0)
>>   #define FPSCR_RM_ZERO          (1 << 0)
>>   
>> -#define DELAY_SLOT_MASK        0x7
>> -#define DELAY_SLOT             (1 << 0)
>> -#define DELAY_SLOT_CONDITIONAL (1 << 1)
>> -#define DELAY_SLOT_RTE         (1 << 2)
>> +#define TB_FLAG_DELAY_SLOT       (1 << 0)
>> +#define TB_FLAG_DELAY_SLOT_COND  (1 << 1)
>> +#define TB_FLAG_DELAY_SLOT_RTE   (1 << 2)
>> +#define TB_FLAG_PENDING_MOVCA    (1 << 3)
>> +#define TB_FLAG_GUSA_SHIFT       4                      /* [11:4] */
>> +#define TB_FLAG_GUSA_EXCLUSIVE   (1 << 12)
>> +#define TB_FLAG_UNALIGN          (1 << 13)
>> +#define TB_FLAG_SR_FD            (1 << SR_FD)           /* 15 */
>> +#define TB_FLAG_FPSCR_PR         FPSCR_PR               /* 19 */
>> +#define TB_FLAG_FPSCR_SZ         FPSCR_SZ               /* 20 */
>> +#define TB_FLAG_FPSCR_FR         FPSCR_FR               /* 21 */
>> +#define TB_FLAG_SR_RB            (1 << SR_RB)           /* 29 */
>> +#define TB_FLAG_SR_MD            (1 << SR_MD)           /* 30 */
>>   
>> -#define TB_FLAG_PENDING_MOVCA  (1 << 3)
>> -#define TB_FLAG_UNALIGN        (1 << 4)
>> -
>> -#define GUSA_SHIFT             4
>> -#ifdef CONFIG_USER_ONLY
>> -#define GUSA_EXCLUSIVE         (1 << 12)
>> -#define GUSA_MASK              ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
>> -#else
>> -/* Provide dummy versions of the above to allow tests against tbflags
>> -   to be elided while avoiding ifdefs.  */
>> -#define GUSA_EXCLUSIVE         0
>> -#define GUSA_MASK              0
>> -#endif
>> -
>> -#define TB_FLAG_ENVFLAGS_MASK  (DELAY_SLOT_MASK | GUSA_MASK)
>> +#define TB_FLAG_DELAY_SLOT_MASK  (TB_FLAG_DELAY_SLOT |       \
>> +                                  TB_FLAG_DELAY_SLOT_COND |  \
>> +                                  TB_FLAG_DELAY_SLOT_RTE)
>> +#define TB_FLAG_GUSA_MASK        ((0xff << TB_FLAG_GUSA_SHIFT) | \
>> +                                  TB_FLAG_GUSA_EXCLUSIVE)
>> +#define TB_FLAG_FPSCR_MASK       (TB_FLAG_FPSCR_PR | \
>> +                                  TB_FLAG_FPSCR_SZ | \
>> +                                  TB_FLAG_FPSCR_FR)
>> +#define TB_FLAG_SR_MASK          (TB_FLAG_SR_FD | \
>> +                                  TB_FLAG_SR_RB | \
>> +                                  TB_FLAG_SR_MD)
>> +#define TB_FLAG_ENVFLAGS_MASK    (TB_FLAG_DELAY_SLOT_MASK | \
>> +                                  TB_FLAG_GUSA_MASK)
>>   
>>   typedef struct tlb_t {
>>       uint32_t vpn;		/* virtual page number */
>> @@ -258,7 +265,7 @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
>>   {
>>       /* The instruction in a RTE delay slot is fetched in privileged
>>          mode, but executed in user mode.  */
>> -    if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
>> +    if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
>>           return 0;
>>       } else {
>>           return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
>> @@ -366,11 +373,10 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
>>   {
>>       *pc = env->pc;
>>       /* For a gUSA region, notice the end of the region.  */
>> -    *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
>> -    *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
>> -            | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
>> -            | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
>> -            | (env->sr & (1u << SR_FD))                        /* Bit 15 */
>> +    *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
>> +    *flags = env->flags
>> +            | (env->fpscr & TB_FLAG_FPSCR_MASK)
>> +            | (env->sr & TB_FLAG_SR_MASK)
>>               | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
>>   #ifdef CONFIG_USER_ONLY
>>       *flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
>> diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
>> index f6a18bc6b5..c4ba962708 100644
>> --- a/linux-user/sh4/signal.c
>> +++ b/linux-user/sh4/signal.c
>> @@ -161,7 +161,7 @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
>>       __get_user(regs->fpul, &sc->sc_fpul);
>>   
>>       regs->tra = -1;         /* disable syscall checks */
>> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
>> +    regs->flags = 0;
>>   }
>>   
>>   void setup_frame(int sig, struct target_sigaction *ka,
>> @@ -199,7 +199,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
>>       regs->gregs[5] = 0;
>>       regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
>>       regs->pc = (unsigned long) ka->_sa_handler;
>> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
>> +    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
>>   
>>       unlock_user_struct(frame, frame_addr, 1);
>>       return;
>> @@ -251,7 +251,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
>>       regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
>>       regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
>>       regs->pc = (unsigned long) ka->_sa_handler;
>> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
>> +    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
>>   
>>       unlock_user_struct(frame, frame_addr, 1);
>>       return;
>> diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
>> index 06b2691dc4..65643b6b1c 100644
>> --- a/target/sh4/cpu.c
>> +++ b/target/sh4/cpu.c
>> @@ -40,7 +40,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
>>       SuperHCPU *cpu = SUPERH_CPU(cs);
>>   
>>       cpu->env.pc = tb->pc;
>> -    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
>> +    cpu->env.flags = tb->flags;
>>   }
>>   
>>   #ifndef CONFIG_USER_ONLY
>> @@ -50,10 +50,10 @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
>>       SuperHCPU *cpu = SUPERH_CPU(cs);
>>       CPUSH4State *env = &cpu->env;
>>   
>> -    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
>> +    if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
>>           && env->pc != tb->pc) {
>>           env->pc -= 2;
>> -        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
>> +        env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
>>           return true;
>>       }
>>       return false;
>> diff --git a/target/sh4/helper.c b/target/sh4/helper.c
>> index 6a620e36fc..e02e7af607 100644
>> --- a/target/sh4/helper.c
>> +++ b/target/sh4/helper.c
>> @@ -147,11 +147,11 @@ void superh_cpu_do_interrupt(CPUState *cs)
>>       env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
>>       env->lock_addr = -1;
>>   
>> -    if (env->flags & DELAY_SLOT_MASK) {
>> +    if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
>>           /* Branch instruction should be executed again before delay slot. */
>>   	env->spc -= 2;
>>   	/* Clear flags for exception/interrupt routine. */
>> -        env->flags &= ~DELAY_SLOT_MASK;
>> +        env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
>>       }
>>   
>>       if (do_exp) {
>> @@ -786,7 +786,7 @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
>>           CPUSH4State *env = &cpu->env;
>>   
>>           /* Delay slots are indivisible, ignore interrupts */
>> -        if (env->flags & DELAY_SLOT_MASK) {
>> +        if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
>>               return false;
>>           } else {
>>               superh_cpu_do_interrupt(cs);
>> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
>> index f1b190e7cf..68d539c7f6 100644
>> --- a/target/sh4/translate.c
>> +++ b/target/sh4/translate.c
>> @@ -175,13 +175,13 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
>>   		    i, env->gregs[i], i + 1, env->gregs[i + 1],
>>   		    i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
>>       }
>> -    if (env->flags & DELAY_SLOT) {
>> +    if (env->flags & TB_FLAG_DELAY_SLOT) {
>>           qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
>>   		    env->delayed_pc);
>> -    } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
>> +    } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
>>           qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
>>   		    env->delayed_pc);
>> -    } else if (env->flags & DELAY_SLOT_RTE) {
>> +    } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
>>           qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
>>                        env->delayed_pc);
>>       }
>> @@ -223,7 +223,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
>>   
>>   static inline bool use_exit_tb(DisasContext *ctx)
>>   {
>> -    return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
>> +    return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
>>   }
>>   
>>   static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
>> @@ -276,12 +276,12 @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
>>       TCGLabel *l1 = gen_new_label();
>>       TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
>>   
>> -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
>> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
>>           /* When in an exclusive region, we must continue to the end.
>>              Therefore, exit the region on a taken branch, but otherwise
>>              fall through to the next instruction.  */
>>           tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
>> -        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
>> +        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
>>           /* Note that this won't actually use a goto_tb opcode because we
>>              disallow it in use_goto_tb, but it handles exit + singlestep.  */
>>           gen_goto_tb(ctx, 0, dest);
>> @@ -307,14 +307,14 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
>>       tcg_gen_mov_i32(ds, cpu_delayed_cond);
>>       tcg_gen_discard_i32(cpu_delayed_cond);
>>   
>> -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
>> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
>>           /* When in an exclusive region, we must continue to the end.
>>              Therefore, exit the region on a taken branch, but otherwise
>>              fall through to the next instruction.  */
>>           tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
>>   
>>           /* Leave the gUSA region.  */
>> -        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
>> +        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
>>           gen_jump(ctx);
>>   
>>           gen_set_label(l1);
>> @@ -361,8 +361,8 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
>>   #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
>>   
>>   #define CHECK_NOT_DELAY_SLOT \
>> -    if (ctx->envflags & DELAY_SLOT_MASK) {  \
>> -        goto do_illegal_slot;               \
>> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {  \
>> +        goto do_illegal_slot;                       \
>>       }
>>   
>>   #define CHECK_PRIVILEGED \
>> @@ -436,7 +436,7 @@ static void _decode_opc(DisasContext * ctx)
>>       case 0x000b:		/* rts */
>>   	CHECK_NOT_DELAY_SLOT
>>   	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
>> -        ctx->envflags |= DELAY_SLOT;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>>   	ctx->delayed_pc = (uint32_t) - 1;
>>   	return;
>>       case 0x0028:		/* clrmac */
>> @@ -458,7 +458,7 @@ static void _decode_opc(DisasContext * ctx)
>>   	CHECK_NOT_DELAY_SLOT
>>           gen_write_sr(cpu_ssr);
>>   	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
>> -        ctx->envflags |= DELAY_SLOT_RTE;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
>>   	ctx->delayed_pc = (uint32_t) - 1;
>>           ctx->base.is_jmp = DISAS_STOP;
>>   	return;
>> @@ -513,12 +513,15 @@ static void _decode_opc(DisasContext * ctx)
>>   	return;
>>       case 0xe000:		/* mov #imm,Rn */
>>   #ifdef CONFIG_USER_ONLY
>> -        /* Detect the start of a gUSA region.  If so, update envflags
>> -           and end the TB.  This will allow us to see the end of the
>> -           region (stored in R0) in the next TB.  */
>> +        /*
>> +         * Detect the start of a gUSA region (mov #-n, r15).
>> +         * If so, update envflags and end the TB.  This will allow us
>> +         * to see the end of the region (stored in R0) in the next TB.
>> +         */
>>           if (B11_8 == 15 && B7_0s < 0 &&
>>               (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
>> -            ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
>> +            ctx->envflags =
>> +                deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
>>               ctx->base.is_jmp = DISAS_STOP;
>>           }
>>   #endif
>> @@ -544,13 +547,13 @@ static void _decode_opc(DisasContext * ctx)
>>       case 0xa000:		/* bra disp */
>>   	CHECK_NOT_DELAY_SLOT
>>           ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
>> -        ctx->envflags |= DELAY_SLOT;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>>   	return;
>>       case 0xb000:		/* bsr disp */
>>   	CHECK_NOT_DELAY_SLOT
>>           tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
>>           ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
>> -        ctx->envflags |= DELAY_SLOT;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>>   	return;
>>       }
>>   
>> @@ -1194,7 +1197,7 @@ static void _decode_opc(DisasContext * ctx)
>>   	CHECK_NOT_DELAY_SLOT
>>           tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
>>           ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
>> -        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
>>   	return;
>>       case 0x8900:		/* bt label */
>>   	CHECK_NOT_DELAY_SLOT
>> @@ -1204,7 +1207,7 @@ static void _decode_opc(DisasContext * ctx)
>>   	CHECK_NOT_DELAY_SLOT
>>           tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
>>           ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
>> -        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
>>   	return;
>>       case 0x8800:		/* cmp/eq #imm,R0 */
>>           tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
>> @@ -1388,14 +1391,14 @@ static void _decode_opc(DisasContext * ctx)
>>       case 0x0023:		/* braf Rn */
>>   	CHECK_NOT_DELAY_SLOT
>>           tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
>> -        ctx->envflags |= DELAY_SLOT;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>>   	ctx->delayed_pc = (uint32_t) - 1;
>>   	return;
>>       case 0x0003:		/* bsrf Rn */
>>   	CHECK_NOT_DELAY_SLOT
>>           tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
>>   	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
>> -        ctx->envflags |= DELAY_SLOT;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>>   	ctx->delayed_pc = (uint32_t) - 1;
>>   	return;
>>       case 0x4015:		/* cmp/pl Rn */
>> @@ -1411,14 +1414,14 @@ static void _decode_opc(DisasContext * ctx)
>>       case 0x402b:		/* jmp @Rn */
>>   	CHECK_NOT_DELAY_SLOT
>>   	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
>> -        ctx->envflags |= DELAY_SLOT;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>>   	ctx->delayed_pc = (uint32_t) - 1;
>>   	return;
>>       case 0x400b:		/* jsr @Rn */
>>   	CHECK_NOT_DELAY_SLOT
>>           tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
>>   	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
>> -        ctx->envflags |= DELAY_SLOT;
>> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
>>   	ctx->delayed_pc = (uint32_t) - 1;
>>   	return;
>>       case 0x400e:		/* ldc Rm,SR */
>> @@ -1839,7 +1842,7 @@ static void _decode_opc(DisasContext * ctx)
>>       fflush(stderr);
>>   #endif
>>    do_illegal:
>> -    if (ctx->envflags & DELAY_SLOT_MASK) {
>> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
>>    do_illegal_slot:
>>           gen_save_cpu_state(ctx, true);
>>           gen_helper_raise_slot_illegal_instruction(cpu_env);
>> @@ -1852,7 +1855,7 @@ static void _decode_opc(DisasContext * ctx)
>>   
>>    do_fpu_disabled:
>>       gen_save_cpu_state(ctx, true);
>> -    if (ctx->envflags & DELAY_SLOT_MASK) {
>> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
>>           gen_helper_raise_slot_fpu_disable(cpu_env);
>>       } else {
>>           gen_helper_raise_fpu_disable(cpu_env);
>> @@ -1867,23 +1870,23 @@ static void decode_opc(DisasContext * ctx)
>>   
>>       _decode_opc(ctx);
>>   
>> -    if (old_flags & DELAY_SLOT_MASK) {
>> +    if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
>>           /* go out of the delay slot */
>> -        ctx->envflags &= ~DELAY_SLOT_MASK;
>> +        ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
>>   
>>           /* When in an exclusive region, we must continue to the end
>>              for conditional branches.  */
>> -        if (ctx->tbflags & GUSA_EXCLUSIVE
>> -            && old_flags & DELAY_SLOT_CONDITIONAL) {
>> +        if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
>> +            && old_flags & TB_FLAG_DELAY_SLOT_COND) {
>>               gen_delayed_conditional_jump(ctx);
>>               return;
>>           }
>>           /* Otherwise this is probably an invalid gUSA region.
>>              Drop the GUSA bits so the next TB doesn't see them.  */
>> -        ctx->envflags &= ~GUSA_MASK;
>> +        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
>>   
>>           tcg_gen_movi_i32(cpu_flags, ctx->envflags);
>> -        if (old_flags & DELAY_SLOT_CONDITIONAL) {
>> +        if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
>>   	    gen_delayed_conditional_jump(ctx);
>>           } else {
>>               gen_jump(ctx);
>> @@ -2223,7 +2226,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
>>       }
>>   
>>       /* The entire region has been translated.  */
>> -    ctx->envflags &= ~GUSA_MASK;
>> +    ctx->envflags &= ~TB_FLAG_GUSA_MASK;
>>       ctx->base.pc_next = pc_end;
>>       ctx->base.num_insns += max_insns - 1;
>>       return;
>> @@ -2234,7 +2237,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
>>   
>>       /* Restart with the EXCLUSIVE bit set, within a TB run via
>>          cpu_exec_step_atomic holding the exclusive lock.  */
>> -    ctx->envflags |= GUSA_EXCLUSIVE;
>> +    ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
>>       gen_save_cpu_state(ctx, false);
>>       gen_helper_exclusive(cpu_env);
>>       ctx->base.is_jmp = DISAS_NORETURN;
>> @@ -2267,17 +2270,19 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>                     (tbflags & (1 << SR_RB))) * 0x10;
>>       ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
>>   
>> -    if (tbflags & GUSA_MASK) {
>> +#ifdef CONFIG_USER_ONLY
>> +    if (tbflags & TB_FLAG_GUSA_MASK) {
>> +        /* In gUSA exclusive region. */
>>           uint32_t pc = ctx->base.pc_next;
>>           uint32_t pc_end = ctx->base.tb->cs_base;
>> -        int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
>> +        int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
>>           int max_insns = (pc_end - pc) / 2;
>>   
>>           if (pc != pc_end + backup || max_insns < 2) {
>>               /* This is a malformed gUSA region.  Don't do anything special,
>>                  since the interpreter is likely to get confused.  */
>> -            ctx->envflags &= ~GUSA_MASK;
>> -        } else if (tbflags & GUSA_EXCLUSIVE) {
>> +            ctx->envflags &= ~TB_FLAG_GUSA_MASK;
>> +        } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
>>               /* Regardless of single-stepping or the end of the page,
>>                  we must complete execution of the gUSA region while
>>                  holding the exclusive lock.  */
>> @@ -2285,6 +2290,7 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>               return;
>>           }
>>       }
>> +#endif
>>   
>>       /* Since the ISA is fixed-width, we can bound by the number
>>          of instructions remaining on the page.  */
>> @@ -2309,8 +2315,8 @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
>>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
>>   
>>   #ifdef CONFIG_USER_ONLY
>> -    if (unlikely(ctx->envflags & GUSA_MASK)
>> -        && !(ctx->envflags & GUSA_EXCLUSIVE)) {
>> +    if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
>> +        && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
>>           /* We're in an gUSA region, and we have not already fallen
>>              back on using an exclusive region.  Attempt to parse the
>>              region into a single supported atomic operation.  Failure
>> @@ -2330,9 +2336,9 @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>>   {
>>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
>>   
>> -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
>> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
>>           /* Ending the region of exclusivity.  Clear the bits.  */
>> -        ctx->envflags &= ~GUSA_MASK;
>> +        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
>>       }
>>   
>>       switch (ctx->base.is_jmp) {
>> -- 
>> 2.34.1
>>
> 
> Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Yoshinori Sato 1 year, 7 months ago
On Mon, 03 Oct 2022 02:23:51 +0900,
Richard Henderson wrote:
> 
> Ping, or should I create a PR myself?
> 
> r~

Sorry.
I can't work this week, so please submit a PR.

> 
> On 9/1/22 07:15, Yoshinori Sato wrote:
> > On Thu, 01 Sep 2022 19:15:09 +0900,
> > Richard Henderson wrote:
> >> 
> >> The value previously chosen overlaps GUSA_MASK.
> >> 
> >> Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
> >> that they are included in TB_FLAGs.  Add aliases for the
> >> FPSCR and SR bits that are included in TB_FLAGS, so that
> >> we don't accidentally reassign those bits.
> >> 
> >> Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
> >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> >> ---
> >>   target/sh4/cpu.h        | 56 +++++++++++++------------
> >>   linux-user/sh4/signal.c |  6 +--
> >>   target/sh4/cpu.c        |  6 +--
> >>   target/sh4/helper.c     |  6 +--
> >>   target/sh4/translate.c  | 90 ++++++++++++++++++++++-------------------
> >>   5 files changed, 88 insertions(+), 76 deletions(-)
> >> 
> >> diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
> >> index 9f15ef913c..727b829598 100644
> >> --- a/target/sh4/cpu.h
> >> +++ b/target/sh4/cpu.h
> >> @@ -78,26 +78,33 @@
> >>   #define FPSCR_RM_NEAREST       (0 << 0)
> >>   #define FPSCR_RM_ZERO          (1 << 0)
> >>   -#define DELAY_SLOT_MASK        0x7
> >> -#define DELAY_SLOT             (1 << 0)
> >> -#define DELAY_SLOT_CONDITIONAL (1 << 1)
> >> -#define DELAY_SLOT_RTE         (1 << 2)
> >> +#define TB_FLAG_DELAY_SLOT       (1 << 0)
> >> +#define TB_FLAG_DELAY_SLOT_COND  (1 << 1)
> >> +#define TB_FLAG_DELAY_SLOT_RTE   (1 << 2)
> >> +#define TB_FLAG_PENDING_MOVCA    (1 << 3)
> >> +#define TB_FLAG_GUSA_SHIFT       4                      /* [11:4] */
> >> +#define TB_FLAG_GUSA_EXCLUSIVE   (1 << 12)
> >> +#define TB_FLAG_UNALIGN          (1 << 13)
> >> +#define TB_FLAG_SR_FD            (1 << SR_FD)           /* 15 */
> >> +#define TB_FLAG_FPSCR_PR         FPSCR_PR               /* 19 */
> >> +#define TB_FLAG_FPSCR_SZ         FPSCR_SZ               /* 20 */
> >> +#define TB_FLAG_FPSCR_FR         FPSCR_FR               /* 21 */
> >> +#define TB_FLAG_SR_RB            (1 << SR_RB)           /* 29 */
> >> +#define TB_FLAG_SR_MD            (1 << SR_MD)           /* 30 */
> >>   -#define TB_FLAG_PENDING_MOVCA  (1 << 3)
> >> -#define TB_FLAG_UNALIGN        (1 << 4)
> >> -
> >> -#define GUSA_SHIFT             4
> >> -#ifdef CONFIG_USER_ONLY
> >> -#define GUSA_EXCLUSIVE         (1 << 12)
> >> -#define GUSA_MASK              ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
> >> -#else
> >> -/* Provide dummy versions of the above to allow tests against tbflags
> >> -   to be elided while avoiding ifdefs.  */
> >> -#define GUSA_EXCLUSIVE         0
> >> -#define GUSA_MASK              0
> >> -#endif
> >> -
> >> -#define TB_FLAG_ENVFLAGS_MASK  (DELAY_SLOT_MASK | GUSA_MASK)
> >> +#define TB_FLAG_DELAY_SLOT_MASK  (TB_FLAG_DELAY_SLOT |       \
> >> +                                  TB_FLAG_DELAY_SLOT_COND |  \
> >> +                                  TB_FLAG_DELAY_SLOT_RTE)
> >> +#define TB_FLAG_GUSA_MASK        ((0xff << TB_FLAG_GUSA_SHIFT) | \
> >> +                                  TB_FLAG_GUSA_EXCLUSIVE)
> >> +#define TB_FLAG_FPSCR_MASK       (TB_FLAG_FPSCR_PR | \
> >> +                                  TB_FLAG_FPSCR_SZ | \
> >> +                                  TB_FLAG_FPSCR_FR)
> >> +#define TB_FLAG_SR_MASK          (TB_FLAG_SR_FD | \
> >> +                                  TB_FLAG_SR_RB | \
> >> +                                  TB_FLAG_SR_MD)
> >> +#define TB_FLAG_ENVFLAGS_MASK    (TB_FLAG_DELAY_SLOT_MASK | \
> >> +                                  TB_FLAG_GUSA_MASK)
> >>     typedef struct tlb_t {
> >>       uint32_t vpn;		/* virtual page number */
> >> @@ -258,7 +265,7 @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
> >>   {
> >>       /* The instruction in a RTE delay slot is fetched in privileged
> >>          mode, but executed in user mode.  */
> >> -    if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
> >> +    if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
> >>           return 0;
> >>       } else {
> >>           return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
> >> @@ -366,11 +373,10 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
> >>   {
> >>       *pc = env->pc;
> >>       /* For a gUSA region, notice the end of the region.  */
> >> -    *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
> >> -    *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
> >> -            | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
> >> -            | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
> >> -            | (env->sr & (1u << SR_FD))                        /* Bit 15 */
> >> +    *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
> >> +    *flags = env->flags
> >> +            | (env->fpscr & TB_FLAG_FPSCR_MASK)
> >> +            | (env->sr & TB_FLAG_SR_MASK)
> >>               | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
> >>   #ifdef CONFIG_USER_ONLY
> >>       *flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
> >> diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
> >> index f6a18bc6b5..c4ba962708 100644
> >> --- a/linux-user/sh4/signal.c
> >> +++ b/linux-user/sh4/signal.c
> >> @@ -161,7 +161,7 @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
> >>       __get_user(regs->fpul, &sc->sc_fpul);
> >>         regs->tra = -1;         /* disable syscall checks */
> >> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
> >> +    regs->flags = 0;
> >>   }
> >>     void setup_frame(int sig, struct target_sigaction *ka,
> >> @@ -199,7 +199,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
> >>       regs->gregs[5] = 0;
> >>       regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
> >>       regs->pc = (unsigned long) ka->_sa_handler;
> >> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
> >> +    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
> >>         unlock_user_struct(frame, frame_addr, 1);
> >>       return;
> >> @@ -251,7 +251,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
> >>       regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
> >>       regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
> >>       regs->pc = (unsigned long) ka->_sa_handler;
> >> -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
> >> +    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
> >>         unlock_user_struct(frame, frame_addr, 1);
> >>       return;
> >> diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
> >> index 06b2691dc4..65643b6b1c 100644
> >> --- a/target/sh4/cpu.c
> >> +++ b/target/sh4/cpu.c
> >> @@ -40,7 +40,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
> >>       SuperHCPU *cpu = SUPERH_CPU(cs);
> >>         cpu->env.pc = tb->pc;
> >> -    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
> >> +    cpu->env.flags = tb->flags;
> >>   }
> >>     #ifndef CONFIG_USER_ONLY
> >> @@ -50,10 +50,10 @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
> >>       SuperHCPU *cpu = SUPERH_CPU(cs);
> >>       CPUSH4State *env = &cpu->env;
> >>   -    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL)))
> >> != 0
> >> +    if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
> >>           && env->pc != tb->pc) {
> >>           env->pc -= 2;
> >> -        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
> >> +        env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
> >>           return true;
> >>       }
> >>       return false;
> >> diff --git a/target/sh4/helper.c b/target/sh4/helper.c
> >> index 6a620e36fc..e02e7af607 100644
> >> --- a/target/sh4/helper.c
> >> +++ b/target/sh4/helper.c
> >> @@ -147,11 +147,11 @@ void superh_cpu_do_interrupt(CPUState *cs)
> >>       env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
> >>       env->lock_addr = -1;
> >>   -    if (env->flags & DELAY_SLOT_MASK) {
> >> +    if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
> >>           /* Branch instruction should be executed again before delay slot. */
> >>   	env->spc -= 2;
> >>   	/* Clear flags for exception/interrupt routine. */
> >> -        env->flags &= ~DELAY_SLOT_MASK;
> >> +        env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
> >>       }
> >>         if (do_exp) {
> >> @@ -786,7 +786,7 @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
> >>           CPUSH4State *env = &cpu->env;
> >>             /* Delay slots are indivisible, ignore interrupts */
> >> -        if (env->flags & DELAY_SLOT_MASK) {
> >> +        if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
> >>               return false;
> >>           } else {
> >>               superh_cpu_do_interrupt(cs);
> >> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> >> index f1b190e7cf..68d539c7f6 100644
> >> --- a/target/sh4/translate.c
> >> +++ b/target/sh4/translate.c
> >> @@ -175,13 +175,13 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
> >>   		    i, env->gregs[i], i + 1, env->gregs[i + 1],
> >>   		    i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
> >>       }
> >> -    if (env->flags & DELAY_SLOT) {
> >> +    if (env->flags & TB_FLAG_DELAY_SLOT) {
> >>           qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
> >>   		    env->delayed_pc);
> >> -    } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
> >> +    } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
> >>           qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
> >>   		    env->delayed_pc);
> >> -    } else if (env->flags & DELAY_SLOT_RTE) {
> >> +    } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
> >>           qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
> >>                        env->delayed_pc);
> >>       }
> >> @@ -223,7 +223,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
> >>     static inline bool use_exit_tb(DisasContext *ctx)
> >>   {
> >> -    return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
> >> +    return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
> >>   }
> >>     static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
> >> @@ -276,12 +276,12 @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
> >>       TCGLabel *l1 = gen_new_label();
> >>       TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
> >>   -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
> >> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
> >>           /* When in an exclusive region, we must continue to the end.
> >>              Therefore, exit the region on a taken branch, but otherwise
> >>              fall through to the next instruction.  */
> >>           tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
> >> -        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
> >> +        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
> >>           /* Note that this won't actually use a goto_tb opcode because we
> >>              disallow it in use_goto_tb, but it handles exit + singlestep.  */
> >>           gen_goto_tb(ctx, 0, dest);
> >> @@ -307,14 +307,14 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
> >>       tcg_gen_mov_i32(ds, cpu_delayed_cond);
> >>       tcg_gen_discard_i32(cpu_delayed_cond);
> >>   -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
> >> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
> >>           /* When in an exclusive region, we must continue to the end.
> >>              Therefore, exit the region on a taken branch, but otherwise
> >>              fall through to the next instruction.  */
> >>           tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
> >>             /* Leave the gUSA region.  */
> >> -        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
> >> +        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
> >>           gen_jump(ctx);
> >>             gen_set_label(l1);
> >> @@ -361,8 +361,8 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
> >>   #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
> >>     #define CHECK_NOT_DELAY_SLOT \
> >> -    if (ctx->envflags & DELAY_SLOT_MASK) {  \
> >> -        goto do_illegal_slot;               \
> >> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {  \
> >> +        goto do_illegal_slot;                       \
> >>       }
> >>     #define CHECK_PRIVILEGED \
> >> @@ -436,7 +436,7 @@ static void _decode_opc(DisasContext * ctx)
> >>       case 0x000b:		/* rts */
> >>   	CHECK_NOT_DELAY_SLOT
> >>   	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
> >> -        ctx->envflags |= DELAY_SLOT;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
> >>   	ctx->delayed_pc = (uint32_t) - 1;
> >>   	return;
> >>       case 0x0028:		/* clrmac */
> >> @@ -458,7 +458,7 @@ static void _decode_opc(DisasContext * ctx)
> >>   	CHECK_NOT_DELAY_SLOT
> >>           gen_write_sr(cpu_ssr);
> >>   	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
> >> -        ctx->envflags |= DELAY_SLOT_RTE;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
> >>   	ctx->delayed_pc = (uint32_t) - 1;
> >>           ctx->base.is_jmp = DISAS_STOP;
> >>   	return;
> >> @@ -513,12 +513,15 @@ static void _decode_opc(DisasContext * ctx)
> >>   	return;
> >>       case 0xe000:		/* mov #imm,Rn */
> >>   #ifdef CONFIG_USER_ONLY
> >> -        /* Detect the start of a gUSA region.  If so, update envflags
> >> -           and end the TB.  This will allow us to see the end of the
> >> -           region (stored in R0) in the next TB.  */
> >> +        /*
> >> +         * Detect the start of a gUSA region (mov #-n, r15).
> >> +         * If so, update envflags and end the TB.  This will allow us
> >> +         * to see the end of the region (stored in R0) in the next TB.
> >> +         */
> >>           if (B11_8 == 15 && B7_0s < 0 &&
> >>               (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
> >> -            ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
> >> +            ctx->envflags =
> >> +                deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
> >>               ctx->base.is_jmp = DISAS_STOP;
> >>           }
> >>   #endif
> >> @@ -544,13 +547,13 @@ static void _decode_opc(DisasContext * ctx)
> >>       case 0xa000:		/* bra disp */
> >>   	CHECK_NOT_DELAY_SLOT
> >>           ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
> >> -        ctx->envflags |= DELAY_SLOT;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
> >>   	return;
> >>       case 0xb000:		/* bsr disp */
> >>   	CHECK_NOT_DELAY_SLOT
> >>           tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
> >>           ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
> >> -        ctx->envflags |= DELAY_SLOT;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
> >>   	return;
> >>       }
> >>   @@ -1194,7 +1197,7 @@ static void _decode_opc(DisasContext * ctx)
> >>   	CHECK_NOT_DELAY_SLOT
> >>           tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
> >>           ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
> >> -        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
> >>   	return;
> >>       case 0x8900:		/* bt label */
> >>   	CHECK_NOT_DELAY_SLOT
> >> @@ -1204,7 +1207,7 @@ static void _decode_opc(DisasContext * ctx)
> >>   	CHECK_NOT_DELAY_SLOT
> >>           tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
> >>           ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
> >> -        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
> >>   	return;
> >>       case 0x8800:		/* cmp/eq #imm,R0 */
> >>           tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
> >> @@ -1388,14 +1391,14 @@ static void _decode_opc(DisasContext * ctx)
> >>       case 0x0023:		/* braf Rn */
> >>   	CHECK_NOT_DELAY_SLOT
> >>           tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
> >> -        ctx->envflags |= DELAY_SLOT;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
> >>   	ctx->delayed_pc = (uint32_t) - 1;
> >>   	return;
> >>       case 0x0003:		/* bsrf Rn */
> >>   	CHECK_NOT_DELAY_SLOT
> >>           tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
> >>   	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
> >> -        ctx->envflags |= DELAY_SLOT;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
> >>   	ctx->delayed_pc = (uint32_t) - 1;
> >>   	return;
> >>       case 0x4015:		/* cmp/pl Rn */
> >> @@ -1411,14 +1414,14 @@ static void _decode_opc(DisasContext * ctx)
> >>       case 0x402b:		/* jmp @Rn */
> >>   	CHECK_NOT_DELAY_SLOT
> >>   	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
> >> -        ctx->envflags |= DELAY_SLOT;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
> >>   	ctx->delayed_pc = (uint32_t) - 1;
> >>   	return;
> >>       case 0x400b:		/* jsr @Rn */
> >>   	CHECK_NOT_DELAY_SLOT
> >>           tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
> >>   	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
> >> -        ctx->envflags |= DELAY_SLOT;
> >> +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
> >>   	ctx->delayed_pc = (uint32_t) - 1;
> >>   	return;
> >>       case 0x400e:		/* ldc Rm,SR */
> >> @@ -1839,7 +1842,7 @@ static void _decode_opc(DisasContext * ctx)
> >>       fflush(stderr);
> >>   #endif
> >>    do_illegal:
> >> -    if (ctx->envflags & DELAY_SLOT_MASK) {
> >> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
> >>    do_illegal_slot:
> >>           gen_save_cpu_state(ctx, true);
> >>           gen_helper_raise_slot_illegal_instruction(cpu_env);
> >> @@ -1852,7 +1855,7 @@ static void _decode_opc(DisasContext * ctx)
> >>      do_fpu_disabled:
> >>       gen_save_cpu_state(ctx, true);
> >> -    if (ctx->envflags & DELAY_SLOT_MASK) {
> >> +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
> >>           gen_helper_raise_slot_fpu_disable(cpu_env);
> >>       } else {
> >>           gen_helper_raise_fpu_disable(cpu_env);
> >> @@ -1867,23 +1870,23 @@ static void decode_opc(DisasContext * ctx)
> >>         _decode_opc(ctx);
> >>   -    if (old_flags & DELAY_SLOT_MASK) {
> >> +    if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
> >>           /* go out of the delay slot */
> >> -        ctx->envflags &= ~DELAY_SLOT_MASK;
> >> +        ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
> >>             /* When in an exclusive region, we must continue to the
> >> end
> >>              for conditional branches.  */
> >> -        if (ctx->tbflags & GUSA_EXCLUSIVE
> >> -            && old_flags & DELAY_SLOT_CONDITIONAL) {
> >> +        if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
> >> +            && old_flags & TB_FLAG_DELAY_SLOT_COND) {
> >>               gen_delayed_conditional_jump(ctx);
> >>               return;
> >>           }
> >>           /* Otherwise this is probably an invalid gUSA region.
> >>              Drop the GUSA bits so the next TB doesn't see them.  */
> >> -        ctx->envflags &= ~GUSA_MASK;
> >> +        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
> >>             tcg_gen_movi_i32(cpu_flags, ctx->envflags);
> >> -        if (old_flags & DELAY_SLOT_CONDITIONAL) {
> >> +        if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
> >>   	    gen_delayed_conditional_jump(ctx);
> >>           } else {
> >>               gen_jump(ctx);
> >> @@ -2223,7 +2226,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
> >>       }
> >>         /* The entire region has been translated.  */
> >> -    ctx->envflags &= ~GUSA_MASK;
> >> +    ctx->envflags &= ~TB_FLAG_GUSA_MASK;
> >>       ctx->base.pc_next = pc_end;
> >>       ctx->base.num_insns += max_insns - 1;
> >>       return;
> >> @@ -2234,7 +2237,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
> >>         /* Restart with the EXCLUSIVE bit set, within a TB run via
> >>          cpu_exec_step_atomic holding the exclusive lock.  */
> >> -    ctx->envflags |= GUSA_EXCLUSIVE;
> >> +    ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
> >>       gen_save_cpu_state(ctx, false);
> >>       gen_helper_exclusive(cpu_env);
> >>       ctx->base.is_jmp = DISAS_NORETURN;
> >> @@ -2267,17 +2270,19 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
> >>                     (tbflags & (1 << SR_RB))) * 0x10;
> >>       ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
> >>   -    if (tbflags & GUSA_MASK) {
> >> +#ifdef CONFIG_USER_ONLY
> >> +    if (tbflags & TB_FLAG_GUSA_MASK) {
> >> +        /* In gUSA exclusive region. */
> >>           uint32_t pc = ctx->base.pc_next;
> >>           uint32_t pc_end = ctx->base.tb->cs_base;
> >> -        int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
> >> +        int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
> >>           int max_insns = (pc_end - pc) / 2;
> >>             if (pc != pc_end + backup || max_insns < 2) {
> >>               /* This is a malformed gUSA region.  Don't do anything special,
> >>                  since the interpreter is likely to get confused.  */
> >> -            ctx->envflags &= ~GUSA_MASK;
> >> -        } else if (tbflags & GUSA_EXCLUSIVE) {
> >> +            ctx->envflags &= ~TB_FLAG_GUSA_MASK;
> >> +        } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
> >>               /* Regardless of single-stepping or the end of the page,
> >>                  we must complete execution of the gUSA region while
> >>                  holding the exclusive lock.  */
> >> @@ -2285,6 +2290,7 @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
> >>               return;
> >>           }
> >>       }
> >> +#endif
> >>         /* Since the ISA is fixed-width, we can bound by the number
> >>          of instructions remaining on the page.  */
> >> @@ -2309,8 +2315,8 @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
> >>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
> >>     #ifdef CONFIG_USER_ONLY
> >> -    if (unlikely(ctx->envflags & GUSA_MASK)
> >> -        && !(ctx->envflags & GUSA_EXCLUSIVE)) {
> >> +    if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
> >> +        && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
> >>           /* We're in an gUSA region, and we have not already fallen
> >>              back on using an exclusive region.  Attempt to parse the
> >>              region into a single supported atomic operation.  Failure
> >> @@ -2330,9 +2336,9 @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
> >>   {
> >>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
> >>   -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
> >> +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
> >>           /* Ending the region of exclusivity.  Clear the bits.  */
> >> -        ctx->envflags &= ~GUSA_MASK;
> >> +        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
> >>       }
> >>         switch (ctx->base.is_jmp) {
> >> -- 
> >> 2.34.1
> >> 
> > 
> > Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
> 

-- 
Yosinori Sato
Re: [PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN
Posted by Richard Henderson 1 year, 7 months ago
On 10/3/22 22:56, Yoshinori Sato wrote:
> On Mon, 03 Oct 2022 02:23:51 +0900,
> Richard Henderson wrote:
>>
>> Ping, or should I create a PR myself?
>>
>> r~
> 
> Sorry.
> I can't work this week, so please submit a PR.

Ok, I will fold this into the tcg-next PR that I am preparing now.


r~