Separate out the actual vsyscall emulation from the #PF specific
handling in preparation for the upcoming #GP emulation.
No functional change intended.
Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
---
v10:
- Modify the code flow slightly to make it easier to follow.
---
arch/x86/entry/vsyscall/vsyscall_64.c | 63 ++++++++++++++-------------
arch/x86/include/asm/vsyscall.h | 7 ++-
arch/x86/mm/fault.c | 2 +-
3 files changed, 36 insertions(+), 36 deletions(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 6e6c0a740837..4c3f49bf39e6 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -112,43 +112,13 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
}
}
-bool emulate_vsyscall(unsigned long error_code,
- struct pt_regs *regs, unsigned long address)
+static bool __emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
unsigned long caller;
int vsyscall_nr, syscall_nr, tmp;
long ret;
unsigned long orig_dx;
- /* Write faults or kernel-privilege faults never get fixed up. */
- if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
- return false;
-
- /*
- * Assume that faults at regs->ip are because of an
- * instruction fetch. Return early and avoid
- * emulation for faults during data accesses:
- */
- if (address != regs->ip) {
- /* Failed vsyscall read */
- if (vsyscall_mode == EMULATE)
- return false;
-
- /*
- * User code tried and failed to read the vsyscall page.
- */
- warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
- return false;
- }
-
- /*
- * X86_PF_INSTR is only set when NX is supported. When
- * available, use it to double-check that the emulation code
- * is only being used for instruction fetches:
- */
- if (cpu_feature_enabled(X86_FEATURE_NX))
- WARN_ON_ONCE(!(error_code & X86_PF_INSTR));
-
/*
* No point in checking CS -- the only way to get here is a user mode
* trap to a high address, which means that we're in 64-bit user code.
@@ -281,6 +251,37 @@ bool emulate_vsyscall(unsigned long error_code,
return true;
}
+bool emulate_vsyscall_pf(unsigned long error_code, struct pt_regs *regs,
+ unsigned long address)
+{
+ /* Write faults or kernel-privilege faults never get fixed up. */
+ if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
+ return false;
+
+ /*
+ * Assume that faults at regs->ip are because of an instruction
+ * fetch. Return early and avoid emulation for faults during
+ * data accesses:
+ */
+ if (address != regs->ip) {
+ /* User code tried and failed to read the vsyscall page. */
+ if (vsyscall_mode != EMULATE)
+ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
+
+ return false;
+ }
+
+ /*
+ * X86_PF_INSTR is only set when NX is supported. When
+ * available, use it to double-check that the emulation code
+ * is only being used for instruction fetches:
+ */
+ if (cpu_feature_enabled(X86_FEATURE_NX))
+ WARN_ON_ONCE(!(error_code & X86_PF_INSTR));
+
+ return __emulate_vsyscall(regs, address);
+}
+
/*
* A pseudo VMA to allow ptrace access for the vsyscall page. This only
* covers the 64bit vsyscall page now. 32bit has a real VMA now and does
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 472f0263dbc6..f34902364972 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -14,12 +14,11 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
* Called on instruction fetch fault in vsyscall page.
* Returns true if handled.
*/
-extern bool emulate_vsyscall(unsigned long error_code,
- struct pt_regs *regs, unsigned long address);
+bool emulate_vsyscall_pf(unsigned long error_code, struct pt_regs *regs, unsigned long address);
#else
static inline void map_vsyscall(void) {}
-static inline bool emulate_vsyscall(unsigned long error_code,
- struct pt_regs *regs, unsigned long address)
+static inline bool emulate_vsyscall_pf(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
{
return false;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 998bd807fc7b..fbcc2da75fd6 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1316,7 +1316,7 @@ void do_user_addr_fault(struct pt_regs *regs,
* to consider the PF_PK bit.
*/
if (is_vsyscall_vaddr(address)) {
- if (emulate_vsyscall(error_code, regs, address))
+ if (emulate_vsyscall_pf(error_code, regs, address))
return;
}
#endif
--
2.43.0
On Mon, 2025-10-06 at 23:51 -0700, Sohil Mehta wrote: > Separate out the actual vsyscall emulation from the #PF specific > handling in preparation for the upcoming #GP emulation. > > No functional change intended. > > Signed-off-by: Sohil Mehta <sohil.mehta@intel.com> > Acked-by: Dave Hansen <dave.hansen@linux.intel.com> > --- > v10: > - Modify the code flow slightly to make it easier to follow. > --- > arch/x86/entry/vsyscall/vsyscall_64.c | 63 ++++++++++++++------------- > arch/x86/include/asm/vsyscall.h | 7 ++- > arch/x86/mm/fault.c | 2 +- > 3 files changed, 36 insertions(+), 36 deletions(-) > > diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c > index 6e6c0a740837..4c3f49bf39e6 100644 > --- a/arch/x86/entry/vsyscall/vsyscall_64.c > +++ b/arch/x86/entry/vsyscall/vsyscall_64.c > @@ -112,43 +112,13 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) > } > } > > -bool emulate_vsyscall(unsigned long error_code, > - struct pt_regs *regs, unsigned long address) > +static bool __emulate_vsyscall(struct pt_regs *regs, unsigned long address) > { > unsigned long caller; > int vsyscall_nr, syscall_nr, tmp; > long ret; > unsigned long orig_dx; > > - /* Write faults or kernel-privilege faults never get fixed up. */ > - if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) > - return false; > - > - /* > - * Assume that faults at regs->ip are because of an > - * instruction fetch. Return early and avoid > - * emulation for faults during data accesses: > - */ > - if (address != regs->ip) { > - /* Failed vsyscall read */ > - if (vsyscall_mode == EMULATE) > - return false; > - > - /* > - * User code tried and failed to read the vsyscall page. > - */ > - warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround"); > - return false; > - } > - > - /* > - * X86_PF_INSTR is only set when NX is supported. When > - * available, use it to double-check that the emulation code > - * is only being used for instruction fetches: > - */ > - if (cpu_feature_enabled(X86_FEATURE_NX)) > - WARN_ON_ONCE(!(error_code & X86_PF_INSTR)); > - > /* > * No point in checking CS -- the only way to get here is a user mode > * trap to a high address, which means that we're in 64-bit user code. I don't know. Is this as true any more? We are now sometimes guessing based on regs->ip of a #GP. What if the kernel accidentally tries to jump to the vsyscall address? Then we are reading the kernel stack and strange things. Maybe it's worth replacing the comment with a check? Feel free to call this paranoid. > @@ -281,6 +251,37 @@ bool emulate_vsyscall(unsigned long error_code, > return true; > } > > +bool emulate_vsyscall_pf(unsigned long error_code, struct pt_regs *regs, > + unsigned long address) > +{ > + /* Write faults or kernel-privilege faults never get fixed up. */ > + if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) > + return false; > + > + /* > + * Assume that faults at regs->ip are because of an instruction > + * fetch. Return early and avoid emulation for faults during > + * data accesses: > + */ > + if (address != regs->ip) { > + /* User code tried and failed to read the vsyscall page. */ > + if (vsyscall_mode != EMULATE) > + warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround"); > + > + return false; > + } > + > + /* > + * X86_PF_INSTR is only set when NX is supported. When > + * available, use it to double-check that the emulation code > + * is only being used for instruction fetches: > + */ > + if (cpu_feature_enabled(X86_FEATURE_NX)) > + WARN_ON_ONCE(!(error_code & X86_PF_INSTR)); > + > + return __emulate_vsyscall(regs, address); > +} > + > /* > * A pseudo VMA to allow ptrace access for the vsyscall page. This only > * covers the 64bit vsyscall page now. 32bit has a real VMA now and does > diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h > index 472f0263dbc6..f34902364972 100644 > --- a/arch/x86/include/asm/vsyscall.h > +++ b/arch/x86/include/asm/vsyscall.h > @@ -14,12 +14,11 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root); > * Called on instruction fetch fault in vsyscall page. > * Returns true if handled. > */ > -extern bool emulate_vsyscall(unsigned long error_code, > - struct pt_regs *regs, unsigned long address); > +bool emulate_vsyscall_pf(unsigned long error_code, struct pt_regs *regs, unsigned long address); > #else > static inline void map_vsyscall(void) {} > -static inline bool emulate_vsyscall(unsigned long error_code, > - struct pt_regs *regs, unsigned long address) > +static inline bool emulate_vsyscall_pf(unsigned long error_code, > + struct pt_regs *regs, unsigned long address) > { > return false; > } > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c > index 998bd807fc7b..fbcc2da75fd6 100644 > --- a/arch/x86/mm/fault.c > +++ b/arch/x86/mm/fault.c > @@ -1316,7 +1316,7 @@ void do_user_addr_fault(struct pt_regs *regs, > * to consider the PF_PK bit. > */ > if (is_vsyscall_vaddr(address)) { > - if (emulate_vsyscall(error_code, regs, address)) > + if (emulate_vsyscall_pf(error_code, regs, address)) > return; > } > #endif
On 10/7/25 11:37, Edgecombe, Rick P wrote: >> /* >> * No point in checking CS -- the only way to get here is a user mode >> * trap to a high address, which means that we're in 64-bit user code. > I don't know. Is this as true any more? We are now sometimes guessing based on > regs->ip of a #GP. What if the kernel accidentally tries to jump to the vsyscall > address? Then we are reading the kernel stack and strange things. Maybe it's > worth replacing the comment with a check? Feel free to call this paranoid. The first check in emulate_vsyscall() is: /* Write faults or kernel-privilege faults never get fixed up. */ if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) return false; If the kernel jumped to the vsyscall page, it would end up there, return false, and never reach the code near the "No point in checking CS" comment. Right? Or am I misunderstanding the scenario you're calling out? If I'm understanding it right, I'd be a bit reluctant to add a CS check as well.
On Tue, 2025-10-07 at 11:48 -0700, Dave Hansen wrote: > On 10/7/25 11:37, Edgecombe, Rick P wrote: > > > /* > > > * No point in checking CS -- the only way to get here is a user mode > > > * trap to a high address, which means that we're in 64-bit user code. > > I don't know. Is this as true any more? We are now sometimes guessing based on > > regs->ip of a #GP. What if the kernel accidentally tries to jump to the vsyscall > > address? Then we are reading the kernel stack and strange things. Maybe it's > > worth replacing the comment with a check? Feel free to call this paranoid. > > The first check in emulate_vsyscall() is: > > /* Write faults or kernel-privilege faults never get fixed up. */ > if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) > return false; > > If the kernel jumped to the vsyscall page, it would end up there, return > false, and never reach the code near the "No point in checking CS" comment. > > Right? Or am I misunderstanding the scenario you're calling out? > > If I'm understanding it right, I'd be a bit reluctant to add a CS check > as well. Sorry, I could have been clearer. Yes, I assumed that the comment was talking about that check you quote. But I'm looking at this applied. The following patches (which don't include that hunk), add another call site: bool emulate_vsyscall_gp(struct pt_regs *regs) { if (!cpu_feature_enabled(X86_FEATURE_LASS)) return false; /* Emulate only if the RIP points to the vsyscall address */ if (!is_vsyscall_vaddr(regs->ip)) return false; return __emulate_vsyscall(regs, regs->ip); } If indeed we should add a check, it should probably go in one of the later patches and not this one.
© 2016 - 2025 Red Hat, Inc.