:p
atchew
Login
Moves sti directly after the cr2 read and immediately after the #PF handler. While in the area, remove redundant q suffix to a movq in entry.S Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> --- Got lost alongside other patches. Here's the promised v2. pipeline: https://gitlab.com/xen-project/people/agvallejo/xen/-/pipelines/1458699639 v1: https://lore.kernel.org/xen-devel/20240911145823.12066-1-alejandro.vallejo@cloud.com/ v2: * (cosmetic), add whitespace after comma * Added ASSERT(local_irq_is_enabled()) to do_page_fault() * Only re-enable interrupts if they were enabled in the interrupted context. --- xen/arch/x86/traps.c | 8 ++++++++ xen/arch/x86/x86_64/entry.S | 20 ++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -XXX,XX +XXX,XX @@ void asmlinkage do_page_fault(struct cpu_user_regs *regs) addr = read_cr2(); + /* + * Don't re-enable interrupts if we were running an IRQ-off region when + * we hit the page fault, or we'll break that code. + */ + ASSERT(!local_irq_is_enabled()); + if ( regs->flags & X86_EFLAGS_IF ) + local_irq_enable(); + /* fixup_page_fault() might change regs->error_code, so cache it here. */ error_code = regs->error_code; diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -XXX,XX +XXX,XX @@ handle_exception_saved: #elif !defined(CONFIG_PV) ASSERT_CONTEXT_IS_XEN #endif /* CONFIG_PV */ - sti -1: movq %rsp,%rdi - movzbl UREGS_entry_vector(%rsp),%eax +.Ldispatch_handlers: + mov %rsp, %rdi + movzbl UREGS_entry_vector(%rsp), %eax #ifdef CONFIG_PERF_COUNTERS lea per_cpu__perfcounters(%rip), %rcx add STACK_CPUINFO_FIELD(per_cpu_offset)(%r14), %rcx @@ -XXX,XX +XXX,XX @@ handle_exception_saved: jmp .L_exn_dispatch_done; \ .L_ ## vec ## _done: + /* + * IRQs kept off to derisk being hit by a nested interrupt before + * reading %cr2. Otherwise a page fault in the nested interrupt handler + * would corrupt %cr2. + */ DISPATCH(X86_EXC_PF, do_page_fault) + + /* Only re-enable IRQs if they were active before taking the fault */ + testb $X86_EFLAGS_IF >> 8, UREGS_eflags + 1(%rsp) + jz 1f + sti +1: + DISPATCH(X86_EXC_GP, do_general_protection) DISPATCH(X86_EXC_UD, do_invalid_op) DISPATCH(X86_EXC_NM, do_device_not_available) @@ -XXX,XX +XXX,XX @@ exception_with_ints_disabled: movq %rsp,%rdi call search_pre_exception_table testq %rax,%rax # no fixup code for faulting EIP? - jz 1b + jz .Ldispatch_handlers movq %rax,UREGS_rip(%rsp) # fixup regular stack #ifdef CONFIG_XEN_SHSTK -- 2.46.0
Hitting a page fault clobbers %cr2, so if a page fault is handled while handling a previous page fault then %cr2 will hold the address of the latter fault rather than the former. This patch makes the page fault path delay re-enabling IRQs until %cr2 has been read in order to ensure it stays consistent. A similar argument holds in additional cases, but they happen to be safe: * %dr6 inside #DB: Safe because IST exceptions don't re-enable IRQs. * MSR_XFD_ERR inside #NM: Safe because AMX isn't used in #NM handler. While in the area, remove redundant q suffix to a movq in entry.S and add space after the comma. Fixes: a4cd20a19073 ("[XEN] 'd' key dumps both host and guest state.") Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> Acked-by: Roger Pau Monné <roger.pau@citrix.com> --- v3: * s/dispatch_handlers/dispatch_exceptions/ * Updated commit message, spelling out the state of #DB and #NM, and state an existing race with debug keys. --- xen/arch/x86/traps.c | 8 ++++++++ xen/arch/x86/x86_64/entry.S | 20 ++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -XXX,XX +XXX,XX @@ void asmlinkage do_page_fault(struct cpu_user_regs *regs) addr = read_cr2(); + /* + * Don't re-enable interrupts if we were running an IRQ-off region when + * we hit the page fault, or we'll break that code. + */ + ASSERT(!local_irq_is_enabled()); + if ( regs->flags & X86_EFLAGS_IF ) + local_irq_enable(); + /* fixup_page_fault() might change regs->error_code, so cache it here. */ error_code = regs->error_code; diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -XXX,XX +XXX,XX @@ handle_exception_saved: #elif !defined(CONFIG_PV) ASSERT_CONTEXT_IS_XEN #endif /* CONFIG_PV */ - sti -1: movq %rsp,%rdi - movzbl UREGS_entry_vector(%rsp),%eax +.Ldispatch_exceptions: + mov %rsp, %rdi + movzbl UREGS_entry_vector(%rsp), %eax #ifdef CONFIG_PERF_COUNTERS lea per_cpu__perfcounters(%rip), %rcx add STACK_CPUINFO_FIELD(per_cpu_offset)(%r14), %rcx @@ -XXX,XX +XXX,XX @@ handle_exception_saved: jmp .L_exn_dispatch_done; \ .L_ ## vec ## _done: + /* + * IRQs kept off to derisk being hit by a nested interrupt before + * reading %cr2. Otherwise a page fault in the nested interrupt handler + * would corrupt %cr2. + */ DISPATCH(X86_EXC_PF, do_page_fault) + + /* Only re-enable IRQs if they were active before taking the fault */ + testb $X86_EFLAGS_IF >> 8, UREGS_eflags + 1(%rsp) + jz 1f + sti +1: + DISPATCH(X86_EXC_GP, do_general_protection) DISPATCH(X86_EXC_UD, do_invalid_op) DISPATCH(X86_EXC_NM, do_device_not_available) @@ -XXX,XX +XXX,XX @@ exception_with_ints_disabled: movq %rsp,%rdi call search_pre_exception_table testq %rax,%rax # no fixup code for faulting EIP? - jz 1b + jz .Ldispatch_exceptions movq %rax,UREGS_rip(%rsp) # fixup regular stack #ifdef CONFIG_XEN_SHSTK -- 2.46.0