target/s390x/cpu.h | 32 ++++++++++++++++++++++++++++++++ target/s390x/excp_helper.c | 14 ++++++++++++++ target/s390x/helper.h | 1 + target/s390x/insn-data.def | 3 +++ target/s390x/translate.c | 29 +++++++++++++++++++++++++++++ 5 files changed, 79 insertions(+)
Recent upstream Linux uses the MONITOR CALL instruction for things like
BUG_ON() and WARN_ON(). We currently inject an operation exception when
we hit a MONITOR CALL instruction - which is wrong, as the instruction
is not glued to specific CPU features.
Doing a simple WARN_ON_ONCE() currently results in a panic:
[ 18.162801] illegal operation: 0001 ilc:2 [#1] SMP
[ 18.162889] Modules linked in:
[...]
[ 18.165476] Kernel panic - not syncing: Fatal exception: panic_on_oops
With a proper implementation, we now get:
[ 18.242754] ------------[ cut here ]------------
[ 18.242855] WARNING: CPU: 7 PID: 1 at init/main.c:1534 [...]
[ 18.242919] Modules linked in:
[...]
[ 18.246262] ---[ end trace a420477d71dc97b4 ]---
[ 18.259014] Freeing unused kernel memory: 4220K
To be able to translate it to a NOP easily, mangle the 16 monitor masks
bits from the cr8 into the TB flags.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
target/s390x/cpu.h | 32 ++++++++++++++++++++++++++++++++
target/s390x/excp_helper.c | 14 ++++++++++++++
target/s390x/helper.h | 1 +
target/s390x/insn-data.def | 3 +++
target/s390x/translate.c | 29 +++++++++++++++++++++++++++++
5 files changed, 79 insertions(+)
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 035427521c..a1871bfa24 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -266,6 +266,7 @@ extern const VMStateDescription vmstate_s390_cpu;
#define PSW_SHIFT_KEY 52
#define PSW_MASK_SHORTPSW 0x0008000000000000ULL
#define PSW_MASK_MCHECK 0x0004000000000000ULL
+#define PSW_SHIFT_MCHECK 50
#define PSW_MASK_WAIT 0x0002000000000000ULL
#define PSW_MASK_PSTATE 0x0001000000000000ULL
#define PSW_MASK_ASC 0x0000C00000000000ULL
@@ -274,6 +275,11 @@ extern const VMStateDescription vmstate_s390_cpu;
#define PSW_MASK_PM 0x00000F0000000000ULL
#define PSW_SHIFT_MASK_PM 40
#define PSW_MASK_RI 0x0000008000000000ULL
+#define PSW_MASK_UNUSED_25 0x0000004000000000ULL
+#define PSW_MASK_UNUSED_26 0x0000002000000000ULL
+#define PSW_MASK_UNUSED_27 0x0000001000000000ULL
+#define PSW_MASK_UNUSED_27 0x0000001000000000ULL
+#define PSW_SHIFT_UNUSED_27 36
#define PSW_MASK_64 0x0000000100000000ULL
#define PSW_MASK_32 0x0000000080000000ULL
#define PSW_MASK_SHORT_ADDR 0x000000007fffffffULL
@@ -311,6 +317,19 @@ extern const VMStateDescription vmstate_s390_cpu;
#define FLAG_MASK_AFP (PSW_MASK_UNUSED_2 >> FLAG_MASK_PSW_SHIFT)
#define FLAG_MASK_VECTOR (PSW_MASK_UNUSED_3 >> FLAG_MASK_PSW_SHIFT)
+/*
+ * We'll store the monitor mask bits in a mixture of unused PSW positions
+ * and used PSW positions that are not copied to tb flags (see FLAG_MASK_PSW).
+ */
+#define FLAG_MASK_MM0_7 ((PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_KEY | \
+ PSW_MASK_SHORTPSW | PSW_MASK_MCHECK) >> \
+ FLAG_MASK_PSW_SHIFT)
+#define FLAG_SHIFT_MM0_7 (PSW_SHIFT_MCHECK - FLAG_MASK_PSW_SHIFT)
+#define FLAG_MASK_MM8_15 ((PSW_MASK_PM | PSW_MASK_RI | \
+ PSW_MASK_UNUSED_25 | PSW_MASK_UNUSED_26 | \
+ PSW_MASK_UNUSED_27) >> FLAG_MASK_PSW_SHIFT)
+#define FLAG_SHIFT_MM8_15 (PSW_SHIFT_UNUSED_27 - FLAG_MASK_PSW_SHIFT)
+
/* Control register 0 bits */
#define CR0_LOWPROT 0x0000000010000000ULL
#define CR0_SECONDARY 0x0000000004000000ULL
@@ -324,6 +343,9 @@ extern const VMStateDescription vmstate_s390_cpu;
#define CR0_CPU_TIMER_SC 0x0000000000000400ULL
#define CR0_SERVICE_SC 0x0000000000000200ULL
+/* Control register 8 bits */
+#define CR8_MONITOR_MASK 0x000000000000ffffULL
+
/* Control register 14 bits */
#define CR14_CHANNEL_REPORT_SC 0x0000000010000000ULL
@@ -367,6 +389,8 @@ static inline int cpu_mmu_index(CPUS390XState *env, bool ifetch)
static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc,
target_ulong *cs_base, uint32_t *flags)
{
+ uint8_t byte;
+
*pc = env->psw.addr;
*cs_base = env->ex_value;
*flags = (env->psw.mask >> FLAG_MASK_PSW_SHIFT) & FLAG_MASK_PSW;
@@ -376,6 +400,14 @@ static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc,
if (env->cregs[0] & CR0_VECTOR) {
*flags |= FLAG_MASK_VECTOR;
}
+ /* Copy over the monitor mask bits (16) as two separate bytes. */
+ byte = (env->cregs[8] & CR8_MONITOR_MASK) >> 8;
+ *flags |= (uint32_t)byte << FLAG_SHIFT_MM0_7;
+ byte = env->cregs[8] & CR8_MONITOR_MASK;
+ *flags |= (uint32_t)byte << FLAG_SHIFT_MM8_15;
+
+ QEMU_BUILD_BUG_ON((FLAG_MASK_AFP | FLAG_MASK_VECTOR | FLAG_MASK_MM0_7 |
+ FLAG_MASK_MM8_15) & FLAG_MASK_PSW);
}
/* PER bits from control register 9 */
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index 3b58d10df3..ba6dc53074 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -610,4 +610,18 @@ void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr);
}
+void HELPER(monitor_event)(CPUS390XState *env, uint64_t monitor_code,
+ uint32_t monitor_class)
+{
+ g_assert(monitor_class <= 0xff);
+
+ /* Store the Monitor Class Number and the Monitor Code into the lowcore */
+ stw_phys(env_cpu(env)->as,
+ env->psa + offsetof(LowCore, mon_class_num), monitor_class);
+ stq_phys(env_cpu(env)->as,
+ env->psa + offsetof(LowCore, monitor_code), monitor_code);
+
+ tcg_s390_program_interrupt(env, PGM_MONITOR, GETPC());
+}
+
#endif /* CONFIG_USER_ONLY */
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index b7887b552b..e72b32e0ca 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -295,6 +295,7 @@ DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
DEF_HELPER_3(servc, i32, env, i64, i64)
DEF_HELPER_4(diag, void, env, i32, i32, i32)
DEF_HELPER_3(load_psw, noreturn, env, i64, i64)
+DEF_HELPER_3(monitor_event, noreturn, env, i64, i32)
DEF_HELPER_FLAGS_2(spx, TCG_CALL_NO_RWG, void, env, i64)
DEF_HELPER_FLAGS_2(sck, TCG_CALL_NO_RWG, i32, env, i64)
DEF_HELPER_FLAGS_2(sckc, TCG_CALL_NO_RWG, void, env, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index d79ae9e3f1..e14cbd63fa 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -617,6 +617,9 @@
C(0x9a00, LAM, RS_a, Z, 0, a2, 0, 0, lam, 0)
C(0xeb9a, LAMY, RSY_a, LD, 0, a2, 0, 0, lam, 0)
+/* MONITOR CALL */
+ C(0xaf00, MC, SI, Z, la1, 0, 0, 0, mc, 0)
+
/* MOVE */
C(0xd200, MVC, SS_a, Z, la1, a2, 0, 0, mvc, 0)
C(0xe544, MVHHI, SIL, GIE, la1, i2, 0, m1_16, mov2, 0)
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index a777343821..3f2bf6576a 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3302,6 +3302,35 @@ static DisasJumpType op_lcbb(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
+static DisasJumpType op_mc(DisasContext *s, DisasOps *o)
+{
+#if !defined(CONFIG_USER_ONLY)
+ /* Reconstruct the monitor mask from the tb flags. */
+ uint16_t monitor_mask = ((s->base.tb->flags & FLAG_MASK_MM0_7) >>
+ (FLAG_SHIFT_MM0_7 - 8)) |
+ ((s->base.tb->flags & FLAG_MASK_MM8_15) >>
+ FLAG_SHIFT_MM8_15);
+ TCGv_i32 i2;
+#endif
+ const uint16_t monitor_class = get_field(s, i2);
+
+ if (monitor_class & 0xff00) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+#if !defined(CONFIG_USER_ONLY)
+ if (monitor_mask & (0x8000 >> monitor_class)) {
+ i2 = tcg_const_i32(get_field(s, i2));
+ gen_helper_monitor_event(cpu_env, o->addr1, i2);
+ tcg_temp_free_i32(i2);
+ return DISAS_NORETURN;
+ }
+#endif
+ /* Defaults to a NOP. */
+ return DISAS_NEXT;
+}
+
static DisasJumpType op_mov2(DisasContext *s, DisasOps *o)
{
o->out = o->in2;
--
2.26.2
On 9/17/20 4:54 AM, David Hildenbrand wrote: > Recent upstream Linux uses the MONITOR CALL instruction for things like > BUG_ON() and WARN_ON(). We currently inject an operation exception when > we hit a MONITOR CALL instruction - which is wrong, as the instruction > is not glued to specific CPU features. > > Doing a simple WARN_ON_ONCE() currently results in a panic: > [ 18.162801] illegal operation: 0001 ilc:2 [#1] SMP > [ 18.162889] Modules linked in: > [...] > [ 18.165476] Kernel panic - not syncing: Fatal exception: panic_on_oops > > With a proper implementation, we now get: > [ 18.242754] ------------[ cut here ]------------ > [ 18.242855] WARNING: CPU: 7 PID: 1 at init/main.c:1534 [...] > [ 18.242919] Modules linked in: > [...] > [ 18.246262] ---[ end trace a420477d71dc97b4 ]--- > [ 18.259014] Freeing unused kernel memory: 4220K > > To be able to translate it to a NOP easily, mangle the 16 monitor masks > bits from the cr8 into the TB flags. This is a rare situation that does not warrant the use of TB flags. Better to unconditionally call helper_monitor_event, and have the helper function test the runtime value of cr8. If the event is disabled, the helper simply returns. It should be simpler to write, as well, not having to do this: > + /* Copy over the monitor mask bits (16) as two separate bytes. */ > + byte = (env->cregs[8] & CR8_MONITOR_MASK) >> 8; > + *flags |= (uint32_t)byte << FLAG_SHIFT_MM0_7; > + byte = env->cregs[8] & CR8_MONITOR_MASK; > + *flags |= (uint32_t)byte << FLAG_SHIFT_MM8_15; > + > + QEMU_BUILD_BUG_ON((FLAG_MASK_AFP | FLAG_MASK_VECTOR | FLAG_MASK_MM0_7 | > + FLAG_MASK_MM8_15) & FLAG_MASK_PSW); r~
On 17.09.20 17:33, Richard Henderson wrote: > On 9/17/20 4:54 AM, David Hildenbrand wrote: >> Recent upstream Linux uses the MONITOR CALL instruction for things like >> BUG_ON() and WARN_ON(). We currently inject an operation exception when >> we hit a MONITOR CALL instruction - which is wrong, as the instruction >> is not glued to specific CPU features. >> >> Doing a simple WARN_ON_ONCE() currently results in a panic: >> [ 18.162801] illegal operation: 0001 ilc:2 [#1] SMP >> [ 18.162889] Modules linked in: >> [...] >> [ 18.165476] Kernel panic - not syncing: Fatal exception: panic_on_oops >> >> With a proper implementation, we now get: >> [ 18.242754] ------------[ cut here ]------------ >> [ 18.242855] WARNING: CPU: 7 PID: 1 at init/main.c:1534 [...] >> [ 18.242919] Modules linked in: >> [...] >> [ 18.246262] ---[ end trace a420477d71dc97b4 ]--- >> [ 18.259014] Freeing unused kernel memory: 4220K >> >> To be able to translate it to a NOP easily, mangle the 16 monitor masks >> bits from the cr8 into the TB flags. > > This is a rare situation that does not warrant the use of TB flags. Better to I think it can be used for tracing and such, so quite excessively - but yeah, at least in Linux it should be rare. > unconditionally call helper_monitor_event, and have the helper function test > the runtime value of cr8. If the event is disabled, the helper simply returns. > > It should be simpler to write, as well, not having to do this: > Was too appealing to optimize. Well, doing it in a handler also makes the implementation of monitor-event counting easier. -- Thanks, David / dhildenb
© 2016 - 2024 Red Hat, Inc.