:p
atchew
Login
Slightly RFC. This is the next chunk of debug fixes from the bug that Jinoh reported. I've decided to tackle PV guests alone to simplify the problem (No introspection, get some of the core changes in place). Patch 5 is still a bit chunky to follow, but I can't see any way to simplify it without transiently breaking something. Patchs 1 and 3 are entirely new, relative to previous postings of this work. Others are rebased/shuffled. There are still bugs/misfeatures: 1) Data breakpoints during emulation (copy to/from guest) are accounted against Xen and not given back to the guest. 2) Instruction breakpoints aren't calculated for FEP; CPUID. This may not matter, but like everything in PV, it's undocumented and unclear if it's intended behaviour or not. that can be left to some other future to fix. Andrew Cooper (5): x86/pv: Fix the determiniation of whether to inject #DB x86: Introduce x86_merge_dr6() x86/emul: Add a pending_dbg field to x86_emulate_ctxt.retire x86/pv: Drop priv_op_ctxt.bpmatch and use pending_dbg instead x86/pv: Rewrite %dr6 handling xen/arch/x86/debug.c | 20 +++++++++++++++++ xen/arch/x86/include/asm/debugreg.h | 7 ++++++ xen/arch/x86/include/asm/domain.h | 12 ++++++++++ xen/arch/x86/include/asm/x86-defns.h | 7 ++++++ xen/arch/x86/pv/emul-priv-op.c | 31 +++++++++++++------------- xen/arch/x86/pv/emulate.c | 6 ++--- xen/arch/x86/pv/ro-page-fault.c | 4 ++-- xen/arch/x86/pv/traps.c | 17 ++++++++++---- xen/arch/x86/traps.c | 12 +++++----- xen/arch/x86/x86_emulate/x86_emulate.c | 6 ++++- xen/arch/x86/x86_emulate/x86_emulate.h | 22 ++++++++++++++---- 11 files changed, 109 insertions(+), 35 deletions(-) -- 2.30.2
We long ago fixed the emulator to not inject exceptions behind our back. Therefore, assert that that a PV event (including interrupts, because that would be buggy too) isn't pending, rather than skipping the #DB injection if one is. On the other hand, the io_emul() stubs which use X86EMUL_DONE rather than X86EMUL_OKAY may have pending breakpoints to inject after the IO access is complete, not to mention a pending singlestep. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> --- xen/arch/x86/pv/emul-priv-op.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -XXX,XX +XXX,XX @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs) switch ( rc ) { case X86EMUL_OKAY: + case X86EMUL_DONE: + ASSERT(!curr->arch.pv.trap_bounce.flags); + if ( ctxt.ctxt.retire.singlestep ) ctxt.bpmatch |= DR_STEP; + if ( ctxt.bpmatch ) { curr->arch.dr6 |= ctxt.bpmatch | DR_STATUS_RESERVED_ONE; - if ( !(curr->arch.pv.trap_bounce.flags & TBF_EXCEPTION) ) - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); + pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); } + /* fall through */ case X86EMUL_RETRY: return EXCRET_fault_fixed; -- 2.30.2
The current logic used to update %dr6 when injecting #DB is buggy. The architectural behaviour is to overwrite B{0..3} and accumulate all other bits. Introduce x86_merge_dr6() to perform the operaton properly. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> --- xen/arch/x86/debug.c | 20 ++++++++++++++++++++ xen/arch/x86/include/asm/debugreg.h | 7 +++++++ xen/arch/x86/include/asm/x86-defns.h | 7 +++++++ 3 files changed, 34 insertions(+) diff --git a/xen/arch/x86/debug.c b/xen/arch/x86/debug.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/debug.c +++ b/xen/arch/x86/debug.c @@ -XXX,XX +XXX,XX @@ * Copyright (C) 2023 XenServer. */ #include <xen/kernel.h> +#include <xen/lib.h> #include <xen/lib/x86/cpu-policy.h> @@ -XXX,XX +XXX,XX @@ unsigned int x86_adj_dr6_rsvd(const struct cpu_policy *p, unsigned int dr6) return dr6; } +unsigned int x86_merge_dr6(const struct cpu_policy *p, unsigned int dr6, + unsigned int new) +{ + /* Flip dr6 to have positive polarity. */ + dr6 ^= X86_DR6_DEFAULT; + + /* Sanity check that only known values are passed in. */ + ASSERT(!(dr6 & ~X86_DR6_KNOWN_MASK)); + ASSERT(!(new & ~X86_DR6_KNOWN_MASK)); + + /* Breakpoint matches are overridden. All other bits accumulate. */ + dr6 = (dr6 & ~X86_DR6_BP_MASK) | new; + + /* Flip dr6 back to having default polarity. */ + dr6 ^= X86_DR6_DEFAULT; + + return x86_adj_dr6_rsvd(p, dr6); +} + unsigned int x86_adj_dr7_rsvd(const struct cpu_policy *p, unsigned int dr7) { unsigned int zeros = X86_DR7_ZEROS; diff --git a/xen/arch/x86/include/asm/debugreg.h b/xen/arch/x86/include/asm/debugreg.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/include/asm/debugreg.h +++ b/xen/arch/x86/include/asm/debugreg.h @@ -XXX,XX +XXX,XX @@ struct cpu_policy; unsigned int x86_adj_dr6_rsvd(const struct cpu_policy *p, unsigned int dr6); unsigned int x86_adj_dr7_rsvd(const struct cpu_policy *p, unsigned int dr7); +/* + * Merge new bits into dr6. 'new' is always given in positive polarity, + * matching the Intel VMCS PENDING_DBG semantics. + */ +unsigned int x86_merge_dr6(const struct cpu_policy *p, unsigned int dr6, + unsigned int new); + #endif /* _X86_DEBUGREG_H */ diff --git a/xen/arch/x86/include/asm/x86-defns.h b/xen/arch/x86/include/asm/x86-defns.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/include/asm/x86-defns.h +++ b/xen/arch/x86/include/asm/x86-defns.h @@ -XXX,XX +XXX,XX @@ #define X86_DR6_BT (_AC(1, UL) << 15) /* Task switch */ #define X86_DR6_RTM (_AC(1, UL) << 16) /* #DB/#BP in RTM region (INV) */ +#define X86_DR6_BP_MASK \ + (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3) + +#define X86_DR6_KNOWN_MASK \ + (X86_DR6_BP_MASK | X86_DR6_BLD | X86_DR6_BD | X86_DR6_BS | \ + X86_DR6_BT | X86_DR6_RTM) + #define X86_DR6_ZEROS _AC(0x00001000, UL) /* %dr6 bits forced to 0 */ #define X86_DR6_DEFAULT _AC(0xffff0ff0, UL) /* Default %dr6 value */ -- 2.30.2
Lots of this is very very broken, but we need to start somewhere. PENDING_DBG, INTERRUPTIBILITY and ACTIVITY are internal pipeline registers which Intel exposed to software in the VMCS, and AMD exposed a subset of in the VMCB. Importantly, bits set in PENDING_DBG can survive across multiple instruction boundaries if e.g. delivery of #DB is delayed by a MovSS. For now, introduce a full pending_dbg field into the retire union. This keeps the sh_page_fault() and init_context() paths working but in due course the field will want to lose the "retire" infix. In addition, set singlestep into pending_dbg as appropriate. Leave the old singlestep bitfield in place until we can adjust the callers to handle it properly. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> --- xen/arch/x86/x86_emulate/x86_emulate.c | 6 +++++- xen/arch/x86/x86_emulate/x86_emulate.h | 17 ++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -XXX,XX +XXX,XX @@ x86_emulate( if ( !mode_64bit() ) _regs.r(ip) = (uint32_t)_regs.r(ip); - /* Should a singlestep #DB be raised? */ + if ( singlestep ) + ctxt->retire.pending_dbg |= X86_DR6_BS; + + /* Should a singlestep #DB be raised? (BROKEN - TODO, merge into pending_dbg) */ if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss ) { ctxt->retire.singlestep = true; @@ -XXX,XX +XXX,XX @@ int x86_emulate_wrapper( { typeof(ctxt->retire) retire = ctxt->retire; + retire.pending_dbg = 0; retire.unblock_nmi = false; ASSERT(!retire.raw); } diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -XXX,XX +XXX,XX @@ struct x86_emulate_ctxt /* Canonical opcode (see below) (valid only on X86EMUL_OKAY). */ unsigned int opcode; - /* Retirement state, set by the emulator (valid only on X86EMUL_OKAY). */ + /* + * Retirement state, set by the emulator (valid only on X86EMUL_OKAY/DONE). + * + * TODO: all this state should be input/output from the VMCS PENDING_DBG, + * INTERRUPTIBILITY and ACTIVITIY fields. + */ union { - uint8_t raw; + unsigned long raw; struct { + /* + * Accumulated %dr6 trap bits, positive polarity. Should only be + * interpreted in the case of X86EMUL_OKAY/DONE. + */ + unsigned int pending_dbg; + bool hlt:1; /* Instruction HLTed. */ bool mov_ss:1; /* Instruction sets MOV-SS irq shadow. */ bool sti:1; /* Instruction sets STI irq shadow. */ bool unblock_nmi:1; /* Instruction clears NMI blocking. */ - bool singlestep:1; /* Singlestepping was active. */ + bool singlestep:1; /* Singlestepping was active. (TODO, merge into pending_dbg) */ }; } retire; -- 2.30.2
With a full pending_dbg field in x86_emulate_ctxt, use it rather than using a local bpmatch field. This simplifies the OKAY/DONE path as singlestep is already accumulated by x86_emulate() when appropriate. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> --- xen/arch/x86/pv/emul-priv-op.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -XXX,XX +XXX,XX @@ struct priv_op_ctxt { unsigned long base, limit; } cs; char *io_emul_stub; - unsigned int bpmatch; }; /* I/O emulation helpers. Use non-standard calling conventions. */ @@ -XXX,XX +XXX,XX @@ static int cf_check read_io( if ( !guest_io_okay(port, bytes, curr, ctxt->regs) ) return X86EMUL_UNHANDLEABLE; - poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes); + poc->ctxt.retire.pending_dbg |= + check_guest_io_breakpoint(curr, port, bytes); if ( admin_io_okay(port, bytes, currd) ) { @@ -XXX,XX +XXX,XX @@ static int cf_check write_io( if ( !guest_io_okay(port, bytes, curr, ctxt->regs) ) return X86EMUL_UNHANDLEABLE; - poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes); + poc->ctxt.retire.pending_dbg |= + check_guest_io_breakpoint(curr, port, bytes); if ( admin_io_okay(port, bytes, currd) ) { @@ -XXX,XX +XXX,XX @@ static int cf_check rep_ins( return X86EMUL_EXCEPTION; } - poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes_per_rep); + poc->ctxt.retire.pending_dbg |= + check_guest_io_breakpoint(curr, port, bytes_per_rep); while ( *reps < goal ) { @@ -XXX,XX +XXX,XX @@ static int cf_check rep_ins( ++*reps; - if ( poc->bpmatch || hypercall_preempt_check() ) + if ( poc->ctxt.retire.pending_dbg || hypercall_preempt_check() ) break; /* x86_emulate() clips the repetition count to ensure we don't wrap. */ @@ -XXX,XX +XXX,XX @@ static int cf_check rep_outs( return X86EMUL_EXCEPTION; } - poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes_per_rep); + poc->ctxt.retire.pending_dbg |= + check_guest_io_breakpoint(curr, port, bytes_per_rep); while ( *reps < goal ) { @@ -XXX,XX +XXX,XX @@ static int cf_check rep_outs( ++*reps; - if ( poc->bpmatch || hypercall_preempt_check() ) + if ( poc->ctxt.retire.pending_dbg || hypercall_preempt_check() ) break; /* x86_emulate() clips the repetition count to ensure we don't wrap. */ @@ -XXX,XX +XXX,XX @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs) case X86EMUL_DONE: ASSERT(!curr->arch.pv.trap_bounce.flags); - if ( ctxt.ctxt.retire.singlestep ) - ctxt.bpmatch |= DR_STEP; - - if ( ctxt.bpmatch ) + if ( ctxt.ctxt.retire.pending_dbg ) { - curr->arch.dr6 |= ctxt.bpmatch | DR_STATUS_RESERVED_ONE; + curr->arch.dr6 |= ctxt.ctxt.retire.pending_dbg | DR_STATUS_RESERVED_ONE; pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); } -- 2.30.2
All #DB exceptions result in an update of %dr6, but this isn't handled properly by Xen for any guest type. To start with, add a new pending_dbg field to x86_event, sharing storage with cr2, and using the Intel VMCS PENDING_DBG semantics. Also introduce a pv_inject_DB() wrapper use this field nicely. Remove all ad-hoc dr6 handling, leaving it to pv_inject_event() in most cases and using the new x86_merge_dr6() helper. In do_debug(), adjust dr6 manually only when a debugger is attached. This maintains the old behaviour. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> --- xen/arch/x86/include/asm/domain.h | 12 ++++++++++++ xen/arch/x86/pv/emul-priv-op.c | 5 +---- xen/arch/x86/pv/emulate.c | 6 ++---- xen/arch/x86/pv/ro-page-fault.c | 4 ++-- xen/arch/x86/pv/traps.c | 17 +++++++++++++---- xen/arch/x86/traps.c | 12 +++++++----- xen/arch/x86/x86_emulate/x86_emulate.h | 5 ++++- 7 files changed, 41 insertions(+), 20 deletions(-) diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/include/asm/domain.h +++ b/xen/arch/x86/include/asm/domain.h @@ -XXX,XX +XXX,XX @@ static inline void pv_inject_hw_exception(unsigned int vector, int errcode) pv_inject_event(&event); } +static inline void pv_inject_DB(unsigned long pending_dbg) +{ + struct x86_event event = { + .vector = X86_EXC_DB, + .type = X86_EVENTTYPE_HW_EXCEPTION, + .error_code = X86_EVENT_NO_EC, + .pending_dbg = pending_dbg, + }; + + pv_inject_event(&event); +} + static inline void pv_inject_page_fault(int errcode, unsigned long cr2) { const struct x86_event event = { diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -XXX,XX +XXX,XX @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs) ASSERT(!curr->arch.pv.trap_bounce.flags); if ( ctxt.ctxt.retire.pending_dbg ) - { - curr->arch.dr6 |= ctxt.ctxt.retire.pending_dbg | DR_STATUS_RESERVED_ONE; - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); - } + pv_inject_DB(ctxt.ctxt.retire.pending_dbg); /* fall through */ case X86EMUL_RETRY: diff --git a/xen/arch/x86/pv/emulate.c b/xen/arch/x86/pv/emulate.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/emulate.c +++ b/xen/arch/x86/pv/emulate.c @@ -XXX,XX +XXX,XX @@ void pv_emul_instruction_done(struct cpu_user_regs *regs, unsigned long rip) { regs->rip = rip; regs->eflags &= ~X86_EFLAGS_RF; + if ( regs->eflags & X86_EFLAGS_TF ) - { - current->arch.dr6 |= DR_STEP | DR_STATUS_RESERVED_ONE; - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); - } + pv_inject_DB(X86_DR6_BS); } uint64_t pv_get_reg(struct vcpu *v, unsigned int reg) diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/ro-page-fault.c +++ b/xen/arch/x86/pv/ro-page-fault.c @@ -XXX,XX +XXX,XX @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs) /* Fallthrough */ case X86EMUL_OKAY: - if ( ctxt.retire.singlestep ) - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); + if ( ctxt.retire.pending_dbg ) + pv_inject_DB(ctxt.retire.pending_dbg); /* Fallthrough */ case X86EMUL_RETRY: diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/traps.c +++ b/xen/arch/x86/pv/traps.c @@ -XXX,XX +XXX,XX @@ #include <xen/softirq.h> #include <asm/pv/trace.h> +#include <asm/debugreg.h> #include <asm/shared.h> #include <asm/traps.h> #include <irq_vectors.h> @@ -XXX,XX +XXX,XX @@ void pv_inject_event(const struct x86_event *event) tb->cs = ti->cs; tb->eip = ti->address; - if ( event->type == X86_EVENTTYPE_HW_EXCEPTION && - vector == X86_EXC_PF ) + switch ( vector | -(event->type == X86_EVENTTYPE_SW_INTERRUPT) ) { + case X86_EXC_PF: curr->arch.pv.ctrlreg[2] = event->cr2; arch_set_cr2(curr, event->cr2); @@ -XXX,XX +XXX,XX @@ void pv_inject_event(const struct x86_event *event) error_code |= PFEC_user_mode; trace_pv_page_fault(event->cr2, error_code); - } - else + break; + + case X86_EXC_DB: + curr->arch.dr6 = x86_merge_dr6(curr->domain->arch.cpu_policy, + curr->arch.dr6, event->pending_dbg); + /* Fallthrough */ + + default: trace_pv_trap(vector, regs->rip, use_error_code, error_code); + break; + } if ( use_error_code ) { diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -XXX,XX +XXX,XX @@ void do_device_not_available(struct cpu_user_regs *regs) /* SAF-1-safe */ void do_debug(struct cpu_user_regs *regs) { - unsigned long dr6; + unsigned long dr6, pending_dbg; struct vcpu *v = current; /* Stash dr6 as early as possible. */ @@ -XXX,XX +XXX,XX @@ void do_debug(struct cpu_user_regs *regs) return; } - /* Save debug status register where guest OS can peek at it */ - v->arch.dr6 |= (dr6 & ~X86_DR6_DEFAULT); - v->arch.dr6 &= (dr6 | ~X86_DR6_DEFAULT); + /* Flip dr6 to have positive polarity. */ + pending_dbg = dr6 ^ X86_DR6_DEFAULT; if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached ) { + /* Save debug status register where gdbsx can peek at it */ + v->arch.dr6 = x86_merge_dr6(v->domain->arch.cpu_policy, + v->arch.dr6, pending_dbg); domain_pause_for_debugger(); return; } - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); + pv_inject_DB(pending_dbg); } /* SAF-1-safe */ diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -XXX,XX +XXX,XX @@ struct x86_event { uint8_t type; /* X86_EVENTTYPE_* */ uint8_t insn_len; /* Instruction length */ int32_t error_code; /* X86_EVENT_NO_EC if n/a */ - unsigned long cr2; /* Only for X86_EXC_PF h/w exception */ + union { + unsigned long cr2; /* #PF */ + unsigned long pending_dbg; /* #DB (new DR6 bits, positive polarity) */ + }; }; /* -- 2.30.2
This time with a bit of sanity testing. See patches for details. Andrew Cooper (7): x86/emul: ASSERT that X86EMUL_DONE doesn't escape to callers x86/emul: Fix and extend #DB trap handling x86/pv: Fix the determiniation of whether to inject #DB x86/pv: Drop priv_op_ctxt.bpmatch and use pending_dbg instead x86: Introduce x86_merge_dr6() x86: Extend x86_event with a pending_dbg field x86/pv: Rewrite %dr6 handling xen/arch/x86/debug.c | 20 +++++++++++++++++ xen/arch/x86/include/asm/debugreg.h | 7 ++++++ xen/arch/x86/include/asm/domain.h | 18 ++++++++++++++-- xen/arch/x86/include/asm/hvm/hvm.h | 3 ++- xen/arch/x86/include/asm/x86-defns.h | 7 ++++++ xen/arch/x86/pv/emul-priv-op.c | 30 +++++++++++++------------- xen/arch/x86/pv/emulate.c | 9 ++++++-- xen/arch/x86/pv/ro-page-fault.c | 4 ++-- xen/arch/x86/pv/traps.c | 17 +++++++++++---- xen/arch/x86/traps.c | 23 ++++++++++---------- xen/arch/x86/x86_emulate/x86_emulate.c | 26 ++++++++++++++++------ xen/arch/x86/x86_emulate/x86_emulate.h | 19 ++++++++++++---- 12 files changed, 134 insertions(+), 49 deletions(-) -- 2.30.2
This property is far from clear. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> v2: * New --- xen/arch/x86/x86_emulate/x86_emulate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -XXX,XX +XXX,XX @@ int x86_emulate_wrapper( rc = x86_emulate(ctxt, ops); + /* + * X86EMUL_DONE is an internal signal in the emulator, and is not expected + * to ever escape out to callers. + */ + ASSERT(rc != X86EMUL_DONE); + /* * Most retire flags should only be set for successful instruction * emulation. -- 2.30.2
Lots of this is very very broken, but we need to start somewhere. First, the bugfix. Hooks which use X86EMUL_DONE to skip the general emulation still need to evaluate singlestep as part of completing the instruction. Defer the logic until X86EMUL_DONE has been converted to X86EMUL_OKAY. Second, the improvement. PENDING_DBG, INTERRUPTIBILITY and ACTIVITY are internal pipeline state which Intel exposed to software in the VMCS, and AMD exposed a subset of in the VMCB. Importantly, bits set in PENDING_DBG can survive across multiple instruction boundaries if e.g. delivery of #DB is delayed by a MovSS. For now, introduce a full pending_dbg field into the retire union. This keeps the sh_page_fault() and init_context() paths working but in due course the field will want to lose the "retire" infix. In addition, set singlestep into pending_dbg as appropriate. Leave the old singlestep bitfield in place until we can adjust the callers to the new scheme. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> v2: * Only evaluate singlestep on X86EMUL_OKAY, but do so after X86EMUL_DONE has been adjusted to X86EMUL_OKAY. * Adjust comments in light of X86EMUL_DONE not getting back to callers. --- xen/arch/x86/x86_emulate/x86_emulate.c | 20 +++++++++++++------- xen/arch/x86/x86_emulate/x86_emulate.h | 14 +++++++++++--- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -XXX,XX +XXX,XX @@ x86_emulate( if ( !mode_64bit() ) _regs.r(ip) = (uint32_t)_regs.r(ip); - /* Should a singlestep #DB be raised? */ - if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss ) - { - ctxt->retire.singlestep = true; - ctxt->retire.sti = false; - } - if ( rc != X86EMUL_DONE ) *ctxt->regs = _regs; else @@ -XXX,XX +XXX,XX @@ x86_emulate( rc = X86EMUL_OKAY; } + /* Should a singlestep #DB be raised? */ + if ( rc == X86EMUL_OKAY && singlestep ) + { + ctxt->retire.pending_dbg |= X86_DR6_BS; + + /* BROKEN - TODO, merge into pending_dbg. */ + if ( !ctxt->retire.mov_ss ) + { + ctxt->retire.singlestep = true; + ctxt->retire.sti = false; + } + } + ctxt->regs->eflags &= ~X86_EFLAGS_RF; done: diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -XXX,XX +XXX,XX @@ struct x86_emulate_ctxt /* Canonical opcode (see below) (valid only on X86EMUL_OKAY). */ unsigned int opcode; - /* Retirement state, set by the emulator (valid only on X86EMUL_OKAY). */ + /* + * Retirement state, set by the emulator (valid only on X86EMUL_OKAY). + * + * TODO: all this state should be input/output from the VMCS PENDING_DBG, + * INTERRUPTIBILITY and ACTIVITIY fields. + */ union { - uint8_t raw; + unsigned long raw; struct { + /* Accumulated %dr6 trap bits, positive polarity. */ + unsigned int pending_dbg; + bool hlt:1; /* Instruction HLTed. */ bool mov_ss:1; /* Instruction sets MOV-SS irq shadow. */ bool sti:1; /* Instruction sets STI irq shadow. */ bool unblock_nmi:1; /* Instruction clears NMI blocking. */ - bool singlestep:1; /* Singlestepping was active. */ + bool singlestep:1; /* Singlestepping was active. (TODO, merge into pending_dbg) */ }; } retire; -- 2.30.2
We long ago fixed the emulator to not inject exceptions behind our back. Therefore, assert that that a PV event (including interrupts, because that would be buggy too) isn't pending, rather than skipping the #DB injection if one is. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> v2: * Drop X86EMUL_DONE adjustment. --- xen/arch/x86/pv/emul-priv-op.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -XXX,XX +XXX,XX @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs) switch ( rc ) { case X86EMUL_OKAY: + ASSERT(!curr->arch.pv.trap_bounce.flags); + if ( ctxt.ctxt.retire.singlestep ) ctxt.bpmatch |= DR_STEP; + if ( ctxt.bpmatch ) { curr->arch.dr6 |= ctxt.bpmatch | DR_STATUS_RESERVED_ONE; - if ( !(curr->arch.pv.trap_bounce.flags & TBF_EXCEPTION) ) - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); + pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); } + /* fall through */ case X86EMUL_RETRY: return EXCRET_fault_fixed; -- 2.30.2
With a full pending_dbg field in x86_emulate_ctxt, use it rather than using a local bpmatch field. This simplifies the X86EMUL_OKAY path as singlestep is already accumulated by x86_emulate() when appropriate. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> v2: * Tweak commit message to avoid referencing X86EMUL_DONE. --- xen/arch/x86/pv/emul-priv-op.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -XXX,XX +XXX,XX @@ struct priv_op_ctxt { unsigned long base, limit; } cs; char *io_emul_stub; - unsigned int bpmatch; }; /* I/O emulation helpers. Use non-standard calling conventions. */ @@ -XXX,XX +XXX,XX @@ static int cf_check read_io( if ( !guest_io_okay(port, bytes, curr, ctxt->regs) ) return X86EMUL_UNHANDLEABLE; - poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes); + poc->ctxt.retire.pending_dbg |= + check_guest_io_breakpoint(curr, port, bytes); if ( admin_io_okay(port, bytes, currd) ) { @@ -XXX,XX +XXX,XX @@ static int cf_check write_io( if ( !guest_io_okay(port, bytes, curr, ctxt->regs) ) return X86EMUL_UNHANDLEABLE; - poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes); + poc->ctxt.retire.pending_dbg |= + check_guest_io_breakpoint(curr, port, bytes); if ( admin_io_okay(port, bytes, currd) ) { @@ -XXX,XX +XXX,XX @@ static int cf_check rep_ins( return X86EMUL_EXCEPTION; } - poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes_per_rep); + poc->ctxt.retire.pending_dbg |= + check_guest_io_breakpoint(curr, port, bytes_per_rep); while ( *reps < goal ) { @@ -XXX,XX +XXX,XX @@ static int cf_check rep_ins( ++*reps; - if ( poc->bpmatch || hypercall_preempt_check() ) + if ( poc->ctxt.retire.pending_dbg || hypercall_preempt_check() ) break; /* x86_emulate() clips the repetition count to ensure we don't wrap. */ @@ -XXX,XX +XXX,XX @@ static int cf_check rep_outs( return X86EMUL_EXCEPTION; } - poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes_per_rep); + poc->ctxt.retire.pending_dbg |= + check_guest_io_breakpoint(curr, port, bytes_per_rep); while ( *reps < goal ) { @@ -XXX,XX +XXX,XX @@ static int cf_check rep_outs( ++*reps; - if ( poc->bpmatch || hypercall_preempt_check() ) + if ( poc->ctxt.retire.pending_dbg || hypercall_preempt_check() ) break; /* x86_emulate() clips the repetition count to ensure we don't wrap. */ @@ -XXX,XX +XXX,XX @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs) case X86EMUL_OKAY: ASSERT(!curr->arch.pv.trap_bounce.flags); - if ( ctxt.ctxt.retire.singlestep ) - ctxt.bpmatch |= DR_STEP; - - if ( ctxt.bpmatch ) + if ( ctxt.ctxt.retire.pending_dbg ) { - curr->arch.dr6 |= ctxt.bpmatch | DR_STATUS_RESERVED_ONE; + curr->arch.dr6 |= ctxt.ctxt.retire.pending_dbg | DR_STATUS_RESERVED_ONE; pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); } -- 2.30.2
The current logic used to update %dr6 when injecting #DB is buggy. The SDM/APM documention on %dr6 updates is far from ideal, but does at least make clear that it's non-trivial. The actual behaviour is to overwrite B{0..3} and accumulate all other bits. Introduce x86_merge_dr6() to perform the operaton properly. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> v2: * Extend commit message --- xen/arch/x86/debug.c | 20 ++++++++++++++++++++ xen/arch/x86/include/asm/debugreg.h | 7 +++++++ xen/arch/x86/include/asm/x86-defns.h | 7 +++++++ 3 files changed, 34 insertions(+) diff --git a/xen/arch/x86/debug.c b/xen/arch/x86/debug.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/debug.c +++ b/xen/arch/x86/debug.c @@ -XXX,XX +XXX,XX @@ * Copyright (C) 2023 XenServer. */ #include <xen/kernel.h> +#include <xen/lib.h> #include <xen/lib/x86/cpu-policy.h> @@ -XXX,XX +XXX,XX @@ unsigned int x86_adj_dr6_rsvd(const struct cpu_policy *p, unsigned int dr6) return dr6; } +unsigned int x86_merge_dr6(const struct cpu_policy *p, unsigned int dr6, + unsigned int new) +{ + /* Flip dr6 to have positive polarity. */ + dr6 ^= X86_DR6_DEFAULT; + + /* Sanity check that only known values are passed in. */ + ASSERT(!(dr6 & ~X86_DR6_KNOWN_MASK)); + ASSERT(!(new & ~X86_DR6_KNOWN_MASK)); + + /* Breakpoint matches are overridden. All other bits accumulate. */ + dr6 = (dr6 & ~X86_DR6_BP_MASK) | new; + + /* Flip dr6 back to having default polarity. */ + dr6 ^= X86_DR6_DEFAULT; + + return x86_adj_dr6_rsvd(p, dr6); +} + unsigned int x86_adj_dr7_rsvd(const struct cpu_policy *p, unsigned int dr7) { unsigned int zeros = X86_DR7_ZEROS; diff --git a/xen/arch/x86/include/asm/debugreg.h b/xen/arch/x86/include/asm/debugreg.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/include/asm/debugreg.h +++ b/xen/arch/x86/include/asm/debugreg.h @@ -XXX,XX +XXX,XX @@ struct cpu_policy; unsigned int x86_adj_dr6_rsvd(const struct cpu_policy *p, unsigned int dr6); unsigned int x86_adj_dr7_rsvd(const struct cpu_policy *p, unsigned int dr7); +/* + * Merge new bits into dr6. 'new' is always given in positive polarity, + * matching the Intel VMCS PENDING_DBG semantics. + */ +unsigned int x86_merge_dr6(const struct cpu_policy *p, unsigned int dr6, + unsigned int new); + #endif /* _X86_DEBUGREG_H */ diff --git a/xen/arch/x86/include/asm/x86-defns.h b/xen/arch/x86/include/asm/x86-defns.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/include/asm/x86-defns.h +++ b/xen/arch/x86/include/asm/x86-defns.h @@ -XXX,XX +XXX,XX @@ #define X86_DR6_BT (_AC(1, UL) << 15) /* Task switch */ #define X86_DR6_RTM (_AC(1, UL) << 16) /* #DB/#BP in RTM region (INV) */ +#define X86_DR6_BP_MASK \ + (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3) + +#define X86_DR6_KNOWN_MASK \ + (X86_DR6_BP_MASK | X86_DR6_BLD | X86_DR6_BD | X86_DR6_BS | \ + X86_DR6_BT | X86_DR6_RTM) + #define X86_DR6_ZEROS _AC(0x00001000, UL) /* %dr6 bits forced to 0 */ #define X86_DR6_DEFAULT _AC(0xffff0ff0, UL) /* Default %dr6 value */ -- 2.30.2
... using the Intel VMCS PENDING_DBG semantics, and sharing storage with cr2. This requires working around anonymous union bugs in obsolete versions of GCC, which in turn needs to drop unnecessary const qualifiers. Also introduce a pv_inject_DB() wrapper use this field nicely. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> v2: * Split out of prior patch. --- xen/arch/x86/include/asm/domain.h | 18 ++++++++++++++++-- xen/arch/x86/include/asm/hvm/hvm.h | 3 ++- xen/arch/x86/x86_emulate/x86_emulate.h | 5 ++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/include/asm/domain.h +++ b/xen/arch/x86/include/asm/domain.h @@ -XXX,XX +XXX,XX @@ static inline void pv_inject_hw_exception(unsigned int vector, int errcode) pv_inject_event(&event); } +static inline void pv_inject_DB(unsigned long pending_dbg) +{ + struct x86_event event = { + .vector = X86_EXC_DB, + .type = X86_EVENTTYPE_HW_EXCEPTION, + .error_code = X86_EVENT_NO_EC, + }; + + event.pending_dbg = pending_dbg; + + pv_inject_event(&event); +} + static inline void pv_inject_page_fault(int errcode, unsigned long cr2) { - const struct x86_event event = { + struct x86_event event = { .vector = X86_EXC_PF, .type = X86_EVENTTYPE_HW_EXCEPTION, .error_code = errcode, - .cr2 = cr2, }; + event.cr2 = cr2; + pv_inject_event(&event); } diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/include/asm/hvm/hvm.h +++ b/xen/arch/x86/include/asm/hvm/hvm.h @@ -XXX,XX +XXX,XX @@ static inline void hvm_inject_page_fault(int errcode, unsigned long cr2) .vector = X86_EXC_PF, .type = X86_EVENTTYPE_HW_EXCEPTION, .error_code = errcode, - .cr2 = cr2, }; + event.cr2 = cr2; + hvm_inject_event(&event); } diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -XXX,XX +XXX,XX @@ struct x86_event { uint8_t type; /* X86_EVENTTYPE_* */ uint8_t insn_len; /* Instruction length */ int32_t error_code; /* X86_EVENT_NO_EC if n/a */ - unsigned long cr2; /* Only for X86_EXC_PF h/w exception */ + union { + unsigned long cr2; /* #PF */ + unsigned long pending_dbg; /* #DB (new DR6 bits, positive polarity) */ + }; }; /* -- 2.30.2
All #DB exceptions result in an update of %dr6, but this isn't handled properly by Xen for any guest type. Remove all ad-hoc dr6 handling, leaving it to pv_inject_event() in most cases and using the new x86_merge_dr6() helper. In do_debug(), swap the dr6 to pending_dbg in order to operate entirely with positive polarity. Among other things, this helps spot RTM/BLD in the diagnostic message. Drop the unconditional v->arch.dr6 adjustment. pv_inject_event() performs the adjustment in the common case, but retain the prior behaviour if a debugger is attached. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Jinoh Kang <jinoh.kang.kr@gmail.com> v2: * Split pieces out into an earlier patch. * Extend do_debug() to use pending_dbg entirely, rather than have the same variable used with different polarity at different times. --- xen/arch/x86/pv/emul-priv-op.c | 5 +---- xen/arch/x86/pv/emulate.c | 9 +++++++-- xen/arch/x86/pv/ro-page-fault.c | 4 ++-- xen/arch/x86/pv/traps.c | 17 +++++++++++++---- xen/arch/x86/traps.c | 23 +++++++++++------------ 5 files changed, 34 insertions(+), 24 deletions(-) diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -XXX,XX +XXX,XX @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs) ASSERT(!curr->arch.pv.trap_bounce.flags); if ( ctxt.ctxt.retire.pending_dbg ) - { - curr->arch.dr6 |= ctxt.ctxt.retire.pending_dbg | DR_STATUS_RESERVED_ONE; - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); - } + pv_inject_DB(ctxt.ctxt.retire.pending_dbg); /* fall through */ case X86EMUL_RETRY: diff --git a/xen/arch/x86/pv/emulate.c b/xen/arch/x86/pv/emulate.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/emulate.c +++ b/xen/arch/x86/pv/emulate.c @@ -XXX,XX +XXX,XX @@ void pv_emul_instruction_done(struct cpu_user_regs *regs, unsigned long rip) { regs->rip = rip; regs->eflags &= ~X86_EFLAGS_RF; + if ( regs->eflags & X86_EFLAGS_TF ) { - current->arch.dr6 |= DR_STEP | DR_STATUS_RESERVED_ONE; - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); + /* + * TODO, this should generally use TF from the start of the + * instruction. It's only a latent bug for now, as this path isn't + * used for any instruction which modifies eflags. + */ + pv_inject_DB(X86_DR6_BS); } } diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/ro-page-fault.c +++ b/xen/arch/x86/pv/ro-page-fault.c @@ -XXX,XX +XXX,XX @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs) /* Fallthrough */ case X86EMUL_OKAY: - if ( ctxt.retire.singlestep ) - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); + if ( ctxt.retire.pending_dbg ) + pv_inject_DB(ctxt.retire.pending_dbg); /* Fallthrough */ case X86EMUL_RETRY: diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/pv/traps.c +++ b/xen/arch/x86/pv/traps.c @@ -XXX,XX +XXX,XX @@ #include <xen/softirq.h> #include <asm/pv/trace.h> +#include <asm/debugreg.h> #include <asm/shared.h> #include <asm/traps.h> #include <irq_vectors.h> @@ -XXX,XX +XXX,XX @@ void pv_inject_event(const struct x86_event *event) tb->cs = ti->cs; tb->eip = ti->address; - if ( event->type == X86_EVENTTYPE_HW_EXCEPTION && - vector == X86_EXC_PF ) + switch ( vector | -(event->type == X86_EVENTTYPE_SW_INTERRUPT) ) { + case X86_EXC_PF: curr->arch.pv.ctrlreg[2] = event->cr2; arch_set_cr2(curr, event->cr2); @@ -XXX,XX +XXX,XX @@ void pv_inject_event(const struct x86_event *event) error_code |= PFEC_user_mode; trace_pv_page_fault(event->cr2, error_code); - } - else + break; + + case X86_EXC_DB: + curr->arch.dr6 = x86_merge_dr6(curr->domain->arch.cpu_policy, + curr->arch.dr6, event->pending_dbg); + /* Fallthrough */ + + default: trace_pv_trap(vector, regs->rip, use_error_code, error_code); + break; + } if ( use_error_code ) { diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -XXX,XX +XXX,XX @@ void do_device_not_available(struct cpu_user_regs *regs) /* SAF-1-safe */ void do_debug(struct cpu_user_regs *regs) { - unsigned long dr6; + unsigned long pending_dbg; struct vcpu *v = current; - /* Stash dr6 as early as possible. */ - dr6 = read_debugreg(6); + /* Stash dr6 as early as possible, operating with positive polarity. */ + pending_dbg = read_debugreg(6) ^ X86_DR6_DEFAULT; /* * At the time of writing (March 2018), on the subject of %dr6: @@ -XXX,XX +XXX,XX @@ void do_debug(struct cpu_user_regs *regs) * If however we do, safety measures need to be enacted. Use a big * hammer and clear all debug settings. */ - if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) ) + if ( pending_dbg & X86_DR6_BP_MASK ) { unsigned int bp, dr7 = read_debugreg(7); for ( bp = 0; bp < 4; ++bp ) { - if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */ + if ( (pending_dbg & (1u << bp)) && /* Breakpoint triggered? */ (dr7 & (3u << (bp * DR_ENABLE_SIZE))) && /* Enabled? */ ((dr7 & (3u << ((bp * DR_CONTROL_SIZE) + /* Insn? */ DR_CONTROL_SHIFT))) == DR_RW_EXECUTE) ) @@ -XXX,XX +XXX,XX @@ void do_debug(struct cpu_user_regs *regs) * so ensure the message is ratelimited. */ gprintk(XENLOG_WARNING, - "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 %lx\n", + "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, pending_dbg %lx\n", regs->cs, _p(regs->rip), _p(regs->rip), - regs->ss, _p(regs->rsp), dr6); + regs->ss, _p(regs->rsp), pending_dbg); return; } - /* Save debug status register where guest OS can peek at it */ - v->arch.dr6 |= (dr6 & ~X86_DR6_DEFAULT); - v->arch.dr6 &= (dr6 | ~X86_DR6_DEFAULT); - if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached ) { + /* Save debug status register where gdbsx can peek at it */ + v->arch.dr6 = x86_merge_dr6(v->domain->arch.cpu_policy, + v->arch.dr6, pending_dbg); domain_pause_for_debugger(); return; } - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); + pv_inject_DB(pending_dbg); } /* SAF-1-safe */ -- 2.30.2