We will want to use that value for call trace generation, and likely
also to eliminate the somewhat fragile shadow stack searching done in
fixup_exception_return(). For those purposes, guest-only entry points do
not need to record that value.
To keep the saving code simple, record our own SSP that corresponds to
an exception frame, pointing to the top of the shadow stack counterpart
of what the CPU has saved on the regular stack. Consuming code can then
work its way from there.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to
the error code isn't entirely nice; putting it ahead of %r15 would entail
other changes, though. An option may be to not make SSP handling part of
the macros in the first place. Thoughts?
For POP_GPRS, does it really matter that it doesn't alter EFLAGS? Neither
of the two currene uses relies on it, and without that requirement we
could use ADD in place of LEA. (Of course there are also POP-based ways of
getting rid of the SSP slot.)
---
v2: Add comment ahead of SAVE_ALL. Add comma between its parameters.
Re-base.
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -103,7 +103,7 @@ __UNLIKELY_END(nsvm_hap)
vmrun
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_CURRENT(bx)
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -22,7 +22,7 @@
#include <asm/page.h>
FUNC(vmx_asm_vmexit_handler)
- SAVE_ALL
+ SAVE_ALL ssp=0
mov %cr2,%rax
GET_CURRENT(bx)
@@ -171,7 +171,7 @@ UNLIKELY_END(realmode)
.Lvmx_vmentry_fail:
sti
- SAVE_ALL
+ SAVE_ALL ssp=0
/*
* SPEC_CTRL_ENTRY notes
--- a/xen/arch/x86/include/asm/asm_defns.h
+++ b/xen/arch/x86/include/asm/asm_defns.h
@@ -219,7 +219,11 @@ static always_inline void stac(void)
#endif
#ifdef __ASSEMBLER__
-.macro SAVE_ALL compat=0
+/*
+ * Use sites may override ssp to 0. It should never be overridden to 1.
+ * NB: compat=1 implies ssp=0.
+ */
+.macro SAVE_ALL compat=0, ssp=IS_ENABLED(CONFIG_XEN_SHSTK)
addq $-(UREGS_error_code-UREGS_r15), %rsp
cld
movq %rdi,UREGS_rdi(%rsp)
@@ -233,6 +237,9 @@ static always_inline void stac(void)
movq %rax,UREGS_rax(%rsp)
xor %eax, %eax
.if !\compat
+.if \ssp
+ rdsspq %rcx
+.endif
movq %r8,UREGS_r8(%rsp)
movq %r9,UREGS_r9(%rsp)
movq %r10,UREGS_r10(%rsp)
@@ -262,6 +269,9 @@ static always_inline void stac(void)
xor %r13d, %r13d
xor %r14d, %r14d
xor %r15d, %r15d
+#ifdef CONFIG_XEN_SHSTK
+ mov %rcx, UREGS_entry_ssp(%rsp)
+#endif
.endm
#define LOAD_ONE_REG(reg, compat) \
@@ -313,9 +323,14 @@ static always_inline void stac(void)
.endm
/*
- * Push and clear GPRs
+ * Push and clear GPRs.
+ *
+ * Use sites may override ssp to 0. It should never be overridden to 1.
*/
-.macro PUSH_AND_CLEAR_GPRS
+.macro PUSH_AND_CLEAR_GPRS ssp=IS_ENABLED(CONFIG_XEN_SHSTK)
+#ifdef CONFIG_XEN_SHSTK
+ push $0
+#endif
push %rdi
xor %edi, %edi
push %rsi
@@ -326,6 +341,9 @@ static always_inline void stac(void)
xor %ecx, %ecx
push %rax
xor %eax, %eax
+ .if \ssp
+ rdsspq %rcx
+ .endif
push %r8
xor %r8d, %r8d
push %r9
@@ -352,6 +370,9 @@ static always_inline void stac(void)
xor %r14d, %r14d
push %r15
xor %r15d, %r15d
+ .if \ssp
+ mov %rcx, UREGS_entry_ssp(%rsp)
+ .endif
.endm
/*
@@ -373,6 +394,9 @@ static always_inline void stac(void)
pop %rdx
pop %rsi
pop %rdi
+#ifdef CONFIG_XEN_SHSTK
+ lea 8(%rsp), %rsp
+#endif
.endm
#ifdef CONFIG_PV32
--- a/xen/arch/x86/include/asm/cpu-user-regs.h
+++ b/xen/arch/x86/include/asm/cpu-user-regs.h
@@ -27,6 +27,15 @@ struct cpu_user_regs
union { uint64_t rsi; uint32_t esi; uint16_t si; uint8_t sil; };
union { uint64_t rdi; uint32_t edi; uint16_t di; uint8_t dil; };
+#ifdef CONFIG_XEN_SHSTK
+ /*
+ * This points _at_ the corresponding shadow stack frame; it is _not_ the
+ * outer context's SSP. That, if the outer context has CET-SS enabled,
+ * is stored in the top slot of the pointed to shadow stack frame.
+ */
+ uint64_t entry_ssp;
+#endif
+
/*
* During IDT delivery for exceptions with an error code, hardware pushes
* to this point. Entry_vector is filled in by software.
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -53,6 +53,9 @@ void __dummy__(void)
OFFSET(UREGS_eflags, struct cpu_user_regs, rflags);
OFFSET(UREGS_rsp, struct cpu_user_regs, rsp);
OFFSET(UREGS_ss, struct cpu_user_regs, ss);
+#ifdef CONFIG_XEN_SHSTK
+ OFFSET(UREGS_entry_ssp, struct cpu_user_regs, entry_ssp);
+#endif
DEFINE(UREGS_kernel_sizeof, sizeof(struct cpu_user_regs));
BLANK();
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -275,7 +275,7 @@ FUNC(lstar_enter)
pushq $0
BUILD_BUG_ON(TRAP_syscall & 0xff)
movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp)
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_STACK_END(14)
@@ -315,7 +315,7 @@ FUNC(cstar_enter)
pushq $0
BUILD_BUG_ON(TRAP_syscall & 0xff)
movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp)
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_STACK_END(14)
@@ -359,7 +359,7 @@ LABEL(sysenter_eflags_saved, 0)
pushq $0
BUILD_BUG_ON(TRAP_syscall & 0xff)
movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp)
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_STACK_END(14)
@@ -415,7 +415,7 @@ FUNC(entry_int80)
ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
pushq $0
movb $0x80, EFRAME_entry_vector(%rsp)
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_STACK_END(14)
--- a/xen/arch/x86/x86_64/entry-fred.S
+++ b/xen/arch/x86/x86_64/entry-fred.S
@@ -10,7 +10,7 @@
/* The Ring3 entry point is required to be 4k aligned. */
FUNC(entry_FRED_R3, 4096)
- PUSH_AND_CLEAR_GPRS
+ PUSH_AND_CLEAR_GPRS ssp=0
mov %rsp, %rdi
call entry_from_pv
@@ -38,7 +38,7 @@ LABEL(eretu, 0)
END(eretu_exit_to_guest)
FUNC(eretu_error_dom_crash)
- PUSH_AND_CLEAR_GPRS
+ PUSH_AND_CLEAR_GPRS ssp=0
sti
call asm_domain_crash_synchronous /* Does not return */
END(eretu_error_dom_crash)
On 08/04/2026 1:22 pm, Jan Beulich wrote: > We will want to use that value for call trace generation, and likely > also to eliminate the somewhat fragile shadow stack searching done in > fixup_exception_return(). For those purposes, guest-only entry points do > not need to record that value. > > To keep the saving code simple, record our own SSP that corresponds to > an exception frame, pointing to the top of the shadow stack counterpart > of what the CPU has saved on the regular stack. Consuming code can then > work its way from there. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to > the error code isn't entirely nice; putting it ahead of %r15 would entail > other changes, though. An option may be to not make SSP handling part of > the macros in the first place. Thoughts? I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial complexity/inefficiency, and mixing of unrelated tasks. I have several series trying to purge them. I suppose I really ought to try and finish this off properly. While classing SSP as a "register" is probably fine, the ssp= parameter (and particular it's asymmetric nature) is on the wrong side of the "complex" argument IMO. > For POP_GPRS, does it really matter that it doesn't alter EFLAGS? Yes. The SYSCALL fix for one (reviewed, but waiting on final testing before I commit). Then the VT-x code when swapped to use POP_GPRS. To take a step back, you say that putting it ahead of %r15 would entail other changes. What changes? The resulting asm would be far cleaner. It would be an rdssp;push on one side, and a pop into any register on the other side. Furthermore, given that the ssp= doesn't exclude storing it for some user frames, just store it for all. It's one push/pop into a hot cacheline, and makes a substantial reduction in complexity. ~Andrew
On 08.04.2026 18:58, Andrew Cooper wrote: > On 08/04/2026 1:22 pm, Jan Beulich wrote: >> We will want to use that value for call trace generation, and likely >> also to eliminate the somewhat fragile shadow stack searching done in >> fixup_exception_return(). For those purposes, guest-only entry points do >> not need to record that value. >> >> To keep the saving code simple, record our own SSP that corresponds to >> an exception frame, pointing to the top of the shadow stack counterpart >> of what the CPU has saved on the regular stack. Consuming code can then >> work its way from there. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> --- >> For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to >> the error code isn't entirely nice; putting it ahead of %r15 would entail >> other changes, though. An option may be to not make SSP handling part of >> the macros in the first place. Thoughts? > > I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial > complexity/inefficiency, and mixing of unrelated tasks. > > I have several series trying to purge them. I suppose I really ought to > try and finish this off properly. > > While classing SSP as a "register" is probably fine, the ssp= parameter > (and particular it's asymmetric nature) is on the wrong side of the > "complex" argument IMO. > >> For POP_GPRS, does it really matter that it doesn't alter EFLAGS? > > Yes. The SYSCALL fix for one (reviewed, but waiting on final testing > before I commit). > > Then the VT-x code when swapped to use POP_GPRS. > > > To take a step back, you say that putting it ahead of %r15 would entail > other changes. What changes? SAVE_ALL's initial ADD, RESTORE_ALL's final SUB, and then the hunt for anything which may simply assume UREGS_r15 to be 0. If UREGS_r<xyz> were ordered by register number, I would have considered putting it where %rsp nominally would go, but without that putting it somewhere in the middle feels rather arbitrary. > The resulting asm would be far cleaner. I agree. > It would be an rdssp;push on > one side, and a pop into any register on the other side. Furthermore, > given that the ssp= doesn't exclude storing it for some user frames, > just store it for all. It's one push/pop into a hot cacheline, and > makes a substantial reduction in complexity. I'm having significant reservations against that. I use the 0 put there in subsequent patches, to identify absence of that data being available. Jan
On 09/04/2026 9:13 am, Jan Beulich wrote: > On 08.04.2026 18:58, Andrew Cooper wrote: >> On 08/04/2026 1:22 pm, Jan Beulich wrote: >>> We will want to use that value for call trace generation, and likely >>> also to eliminate the somewhat fragile shadow stack searching done in >>> fixup_exception_return(). For those purposes, guest-only entry points do >>> not need to record that value. >>> >>> To keep the saving code simple, record our own SSP that corresponds to >>> an exception frame, pointing to the top of the shadow stack counterpart >>> of what the CPU has saved on the regular stack. Consuming code can then >>> work its way from there. >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> --- >>> For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to >>> the error code isn't entirely nice; putting it ahead of %r15 would entail >>> other changes, though. An option may be to not make SSP handling part of >>> the macros in the first place. Thoughts? >> I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial >> complexity/inefficiency, and mixing of unrelated tasks. >> >> I have several series trying to purge them. I suppose I really ought to >> try and finish this off properly. >> >> While classing SSP as a "register" is probably fine, the ssp= parameter >> (and particular it's asymmetric nature) is on the wrong side of the >> "complex" argument IMO. >> >>> For POP_GPRS, does it really matter that it doesn't alter EFLAGS? >> Yes. The SYSCALL fix for one (reviewed, but waiting on final testing >> before I commit). >> >> Then the VT-x code when swapped to use POP_GPRS. >> >> >> To take a step back, you say that putting it ahead of %r15 would entail >> other changes. What changes? > SAVE_ALL's initial ADD, RESTORE_ALL's final SUB, Ok, this problem goes away if they're purged. I guess I should refresh and repost my series. > and then the hunt for > anything which may simply assume UREGS_r15 to be 0. I highly doubt we've got anything like this. (And even if we do, it wants fixing, not using as an argument against doing this the nicer way.) > If UREGS_r<xyz> were > ordered by register number, I would have considered putting it where > %rsp nominally would go, but without that putting it somewhere in the > middle feels rather arbitrary. > >> The resulting asm would be far cleaner. > I agree. > >> It would be an rdssp;push on >> one side, and a pop into any register on the other side. Furthermore, >> given that the ssp= doesn't exclude storing it for some user frames, >> just store it for all. It's one push/pop into a hot cacheline, and >> makes a substantial reduction in complexity. > I'm having significant reservations against that. I use the 0 put there > in subsequent patches, to identify absence of that data being available. Well, that's not safe then. You've already got non-zero values there on entry-from-PV because there's no CPL check gating RDSPP the common IDT paths. ~Andrew
On 09.04.2026 13:22, Andrew Cooper wrote: > On 09/04/2026 9:13 am, Jan Beulich wrote: >> On 08.04.2026 18:58, Andrew Cooper wrote: >>> It would be an rdssp;push on >>> one side, and a pop into any register on the other side. Furthermore, >>> given that the ssp= doesn't exclude storing it for some user frames, >>> just store it for all. It's one push/pop into a hot cacheline, and >>> makes a substantial reduction in complexity. >> I'm having significant reservations against that. I use the 0 put there >> in subsequent patches, to identify absence of that data being available. > > Well, that's not safe then. > > You've already got non-zero values there on entry-from-PV because > there's no CPL check gating RDSPP the common IDT paths. In that case we get safe values (as read from the MSR during the privilege level switch). And wait - no, the latter two patches don't make any such assumptions, I don't think. I may have mis-remembered, or I may have remembered how things were at a very early stage. So really the concern is with RDSSPQ's resource use. This could in principle be as cheap as MOV, but how do I know? (The latency/throughput data I can find doesn't include this insn.) Plus for the purely PV entrypoints I don't see why we would want/need the slightly larger code size that would result. Jan
© 2016 - 2026 Red Hat, Inc.