Hitting a page fault clobbers %cr2, so if a page fault is handled while
handling a previous page fault then %cr2 will hold the address of the
latter fault rather than the former. In particular, if a debug key
handler happens to trigger during #PF and before %cr2 is read, and that
handler itself encounters a #PF, then %cr2 will be corrupt for the outer #PF
handler.
This patch makes the page fault path delay re-enabling IRQs until %cr2
has been read in order to ensure it stays consistent.
A similar argument holds in additional cases, but they happen to be safe:
* %dr6 inside #DB: Safe because IST exceptions don't re-enable IRQs.
* MSR_XFD_ERR inside #NM: Safe because AMX isn't used in #NM handler.
While in the area, remove redundant q suffix to a movq in entry.S and
the space after the comma.
Fixes: a4cd20a19073 ("[XEN] 'd' key dumps both host and guest state.")
Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
---
v3:
* s/dispatch_handlers/dispatch_exceptions/
* Updated commit message, spelling out the state of #DB and #NM, and
state an existing race with debug keys.
---
xen/arch/x86/traps.c | 8 ++++++++
xen/arch/x86/x86_64/entry.S | 20 ++++++++++++++++----
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 708136f62558..a9c2c607eb08 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1600,6 +1600,14 @@ void asmlinkage do_page_fault(struct cpu_user_regs *regs)
addr = read_cr2();
+ /*
+ * Don't re-enable interrupts if we were running an IRQ-off region when
+ * we hit the page fault, or we'll break that code.
+ */
+ ASSERT(!local_irq_is_enabled());
+ if ( regs->flags & X86_EFLAGS_IF )
+ local_irq_enable();
+
/* fixup_page_fault() might change regs->error_code, so cache it here. */
error_code = regs->error_code;
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index b8482de8ee5b..9b0cdb76408b 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -844,9 +844,9 @@ handle_exception_saved:
#elif !defined(CONFIG_PV)
ASSERT_CONTEXT_IS_XEN
#endif /* CONFIG_PV */
- sti
-1: movq %rsp,%rdi
- movzbl UREGS_entry_vector(%rsp),%eax
+.Ldispatch_exceptions:
+ mov %rsp, %rdi
+ movzbl UREGS_entry_vector(%rsp), %eax
#ifdef CONFIG_PERF_COUNTERS
lea per_cpu__perfcounters(%rip), %rcx
add STACK_CPUINFO_FIELD(per_cpu_offset)(%r14), %rcx
@@ -866,7 +866,19 @@ handle_exception_saved:
jmp .L_exn_dispatch_done; \
.L_ ## vec ## _done:
+ /*
+ * IRQs kept off to derisk being hit by a nested interrupt before
+ * reading %cr2. Otherwise a page fault in the nested interrupt handler
+ * would corrupt %cr2.
+ */
DISPATCH(X86_EXC_PF, do_page_fault)
+
+ /* Only re-enable IRQs if they were active before taking the fault */
+ testb $X86_EFLAGS_IF >> 8, UREGS_eflags + 1(%rsp)
+ jz 1f
+ sti
+1:
+
DISPATCH(X86_EXC_GP, do_general_protection)
DISPATCH(X86_EXC_UD, do_invalid_op)
DISPATCH(X86_EXC_NM, do_device_not_available)
@@ -911,7 +923,7 @@ exception_with_ints_disabled:
movq %rsp,%rdi
call search_pre_exception_table
testq %rax,%rax # no fixup code for faulting EIP?
- jz 1b
+ jz .Ldispatch_exceptions
movq %rax,UREGS_rip(%rsp) # fixup regular stack
#ifdef CONFIG_XEN_SHSTK
--
2.46.0