xen/arch/x86/cpu/common.c | 25 ++++++++++++++++++---- xen/arch/x86/include/asm/shstk.h | 46 ++++++++++++++++++++++++++++++++++++++++ xen/arch/x86/traps.c | 8 +++---- 3 files changed, 70 insertions(+), 9 deletions(-) create mode 100644 xen/arch/x86/include/asm/shstk.h
Stacks are not freed across S3. Execution just stops, leaving supervisor
token busy bits active. Fixing this for the primary shadow stack was done
previously, but there is a (rare) risk that an IST token is left busy too.
This will manifest as #DF next time the IST vector gets used.
Introduce rdssp() and wrss() helpers in a new shstk.h, cleaning up
fixup_exception_return() and explaining the trick with the literal 1.
Then this infrastructure to rewrite the IST tokens in load_system_tables()
when all the other IST details are being set up. In the case that an IST
token were left busy across S3, this will clear the busy bit before the stack
gets used.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
---
xen/arch/x86/cpu/common.c | 25 ++++++++++++++++++----
xen/arch/x86/include/asm/shstk.h | 46 ++++++++++++++++++++++++++++++++++++++++
xen/arch/x86/traps.c | 8 +++----
3 files changed, 70 insertions(+), 9 deletions(-)
create mode 100644 xen/arch/x86/include/asm/shstk.h
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index bd2207163a35..6eab7dbe894c 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -13,6 +13,7 @@
#include <asm/apic.h>
#include <asm/random.h>
#include <asm/setup.h>
+#include <asm/shstk.h>
#include <mach_apic.h>
#include <public/sysctl.h> /* for XEN_INVALID_{SOCKET,CORE}_ID */
@@ -826,15 +827,31 @@ void load_system_tables(void)
*/
if (cpu_has_xen_shstk) {
volatile uint64_t *ist_ssp = tss_page->ist_ssp;
+ unsigned long
+ mce_ssp = stack_top + (IST_MCE * IST_SHSTK_SIZE) - 8,
+ nmi_ssp = stack_top + (IST_NMI * IST_SHSTK_SIZE) - 8,
+ db_ssp = stack_top + (IST_DB * IST_SHSTK_SIZE) - 8,
+ df_ssp = stack_top + (IST_DF * IST_SHSTK_SIZE) - 8;
ist_ssp[0] = 0x8600111111111111ul;
- ist_ssp[IST_MCE] = stack_top + (IST_MCE * IST_SHSTK_SIZE) - 8;
- ist_ssp[IST_NMI] = stack_top + (IST_NMI * IST_SHSTK_SIZE) - 8;
- ist_ssp[IST_DB] = stack_top + (IST_DB * IST_SHSTK_SIZE) - 8;
- ist_ssp[IST_DF] = stack_top + (IST_DF * IST_SHSTK_SIZE) - 8;
+ ist_ssp[IST_MCE] = mce_ssp;
+ ist_ssp[IST_NMI] = nmi_ssp;
+ ist_ssp[IST_DB] = db_ssp;
+ ist_ssp[IST_DF] = df_ssp;
for ( i = IST_DF + 1; i < ARRAY_SIZE(tss_page->ist_ssp); ++i )
ist_ssp[i] = 0x8600111111111111ul;
+ if (rdssp() != SSP_NO_SHSTK) {
+ /*
+ * Rewrite supervisor tokens when shadow stacks are
+ * active. This resets any busy bits left across S3.
+ */
+ wrss(mce_ssp, _p(mce_ssp));
+ wrss(nmi_ssp, _p(nmi_ssp));
+ wrss(db_ssp, _p(db_ssp));
+ wrss(df_ssp, _p(df_ssp));
+ }
+
wrmsrl(MSR_INTERRUPT_SSP_TABLE, (unsigned long)ist_ssp);
}
diff --git a/xen/arch/x86/include/asm/shstk.h b/xen/arch/x86/include/asm/shstk.h
new file mode 100644
index 000000000000..a26e3c70f0c6
--- /dev/null
+++ b/xen/arch/x86/include/asm/shstk.h
@@ -0,0 +1,46 @@
+/******************************************************************************
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2022 Citrix Systems Ltd.
+ */
+#ifndef XEN_ASM_SHSTK_H
+#define XEN_ASM_SHSTK_H
+
+/*
+ * RDSSP is a nop when shadow stacks are active. Also, SSP has a minimum
+ * alignment of 4 which enforced by hardware.
+ *
+ * We load 1 into a register, then RDSSP. If shadow stacks are not active,
+ * RDSSP is a nop, and the 1 is preserved. If shadow stacks are active, the 1
+ * is clobbered with the real SSP, which has the bottom two bits clear.
+ */
+#define SSP_NO_SHSTK 1
+
+static inline unsigned long rdssp(void)
+{
+ unsigned long ssp;
+
+ asm volatile ( "rdsspq %0" : "=r" (ssp) : "0" (SSP_NO_SHSTK) );
+
+ return ssp;
+}
+
+static inline void wrss(unsigned long val, unsigned long *ptr)
+{
+ asm ( "wrssq %[val], %[ptr]"
+ : [ptr] "=m" (*ptr)
+ : [val] "r" (val) );
+}
+
+#endif /* XEN_ASM_SHSTK_H */
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index a2278d9499d0..86595479707a 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -84,6 +84,7 @@
#include <asm/pv/traps.h>
#include <asm/pv/trace.h>
#include <asm/pv/mm.h>
+#include <asm/shstk.h>
/*
* opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
@@ -868,8 +869,7 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
{
unsigned long ssp, *ptr, *base;
- asm ( "rdsspq %0" : "=r" (ssp) : "0" (1) );
- if ( ssp == 1 )
+ if ( (ssp = rdssp()) == SSP_NO_SHSTK )
goto shstk_done;
ptr = _p(ssp);
@@ -898,9 +898,7 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
*/
if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
{
- asm ( "wrssq %[fix], %[stk]"
- : [stk] "=m" (ptr[0])
- : [fix] "r" (fixup) );
+ wrss(fixup, ptr);
goto shstk_done;
}
}
--
2.11.0
On 14.03.2022 12:00, Andrew Cooper wrote: > Stacks are not freed across S3. Execution just stops, leaving supervisor > token busy bits active. Fixing this for the primary shadow stack was done > previously, but there is a (rare) risk that an IST token is left busy too. > This will manifest as #DF next time the IST vector gets used. Thinking about it some more - wouldn't it be more natural to turn off CET as CPUs are being brought down (and for the BSP as late as possible before actually invoking S3)? That way no new busy bits can be written anymore. Jan
On 14/03/2022 14:29, Jan Beulich wrote: > On 14.03.2022 12:00, Andrew Cooper wrote: >> Stacks are not freed across S3. Execution just stops, leaving supervisor >> token busy bits active. Fixing this for the primary shadow stack was done >> previously, but there is a (rare) risk that an IST token is left busy too. >> This will manifest as #DF next time the IST vector gets used. > Thinking about it some more - wouldn't it be more natural to turn off > CET as CPUs are being brought down (and for the BSP as late as possible > before actually invoking S3)? That way no new busy bits can be written > anymore. I did consider that, but I don't want to go down that route. It's too easy to let that turn into a bug where we fail to turn shstk back on, particularly as we have cpu parking rather than full offline. It's far safer IMO to ensure that shstk is active for all activities in Xen context. ~Andrew
On 14.03.2022 12:00, Andrew Cooper wrote: > Stacks are not freed across S3. Execution just stops, leaving supervisor > token busy bits active. Fixing this for the primary shadow stack was done > previously, but there is a (rare) risk that an IST token is left busy too. > This will manifest as #DF next time the IST vector gets used. Under what (rare) condition would this happen? The only scenario I could come up with (which wouldn't result in a crash anyway) is the NMI watchdog hitting after a CPU was already taken offline, and the handler not managing to complete before power is cut. I think it would help to mention one such specific case. > --- /dev/null > +++ b/xen/arch/x86/include/asm/shstk.h > @@ -0,0 +1,46 @@ > +/****************************************************************************** > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; If not, see <http://www.gnu.org/licenses/>. > + * > + * Copyright (c) 2022 Citrix Systems Ltd. > + */ > +#ifndef XEN_ASM_SHSTK_H > +#define XEN_ASM_SHSTK_H > + > +/* > + * RDSSP is a nop when shadow stacks are active. I guess there's a "not" missing here, supported by ... > Also, SSP has a minimum > + * alignment of 4 which enforced by hardware. > + * > + * We load 1 into a register, then RDSSP. If shadow stacks are not active, > + * RDSSP is a nop, and the 1 is preserved. ... this. As an alternative I wouldn't mind if you removed the redundancy. Then Reviewed-by: Jan Beulich <jbeulich@suse.com> Jan
On 14/03/2022 14:16, Jan Beulich wrote: > On 14.03.2022 12:00, Andrew Cooper wrote: >> Stacks are not freed across S3. Execution just stops, leaving supervisor >> token busy bits active. Fixing this for the primary shadow stack was done >> previously, but there is a (rare) risk that an IST token is left busy too. >> This will manifest as #DF next time the IST vector gets used. > Under what (rare) condition would this happen? The only scenario I could > come up with (which wouldn't result in a crash anyway) is the NMI watchdog > hitting after a CPU was already taken offline, and the handler not > managing to complete before power is cut. I think it would help to mention > one such specific case. Any NMI, and any #MC. They're the only two IST vectors which are triggered by out-of-core actions. #MC in particular because even LMCE hits both threads. >> --- /dev/null >> +++ b/xen/arch/x86/include/asm/shstk.h >> @@ -0,0 +1,46 @@ >> +/****************************************************************************** >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; If not, see <http://www.gnu.org/licenses/>. >> + * >> + * Copyright (c) 2022 Citrix Systems Ltd. >> + */ >> +#ifndef XEN_ASM_SHSTK_H >> +#define XEN_ASM_SHSTK_H >> + >> +/* >> + * RDSSP is a nop when shadow stacks are active. > I guess there's a "not" missing here, supported by ... > >> Also, SSP has a minimum >> + * alignment of 4 which enforced by hardware. >> + * >> + * We load 1 into a register, then RDSSP. If shadow stacks are not active, >> + * RDSSP is a nop, and the 1 is preserved. > ... this. Yes. > As an alternative I wouldn't mind if you removed the redundancy. > Then > Reviewed-by: Jan Beulich <jbeulich@suse.com> Thanks. I'll see what I can to do tweak the wording, but separating the statements of behaviour from the description of the logic was intentional. ~Andrew
© 2016 - 2024 Red Hat, Inc.