MDS mitigation requires clearing the CPU buffers before returning to
user. This needs to be done late in the exit-to-user path. Current
location of VERW leaves a possibility of kernel data ending up in CPU
buffers for memory accesses done after VERW such as:
1. Kernel data accessed by an NMI between VERW and return-to-user can
remain in CPU buffers since NMI returning to kernel does not
execute VERW to clear CPU buffers.
2. Alyssa reported that after VERW is executed,
CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
call. Memory accesses during stack scrubbing can move kernel stack
contents into CPU buffers.
3. When caller saved registers are restored after a return from
function executing VERW, the kernel stack accesses can remain in
CPU buffers(since they occur after VERW).
To fix this VERW needs to be moved very late in exit-to-user path.
In preparation for moving VERW to entry/exit asm code, create macros
that can be used in asm. Also make VERW patching depend on a new feature
flag X86_FEATURE_CLEAR_CPU_BUF.
Reported-by: Alyssa Milburn <alyssa.milburn@intel.com>
Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
---
arch/x86/entry/entry.S | 22 ++++++++++++++++++++++
arch/x86/include/asm/cpufeatures.h | 2 +-
arch/x86/include/asm/nospec-branch.h | 15 +++++++++++++++
3 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 8c8d38f0cb1d..bd8e77c5a375 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@
#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
.pushsection .noinstr.text, "ax"
@@ -20,3 +23,22 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);
.popsection
+
+/*
+ * Defines the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensures VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ .word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4af140cf5719..79a7e81b9458 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -308,10 +308,10 @@
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_CLEAR_CPU_BUF (11*32+27) /* "" Clear CPU buffers using VERW */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f93e9b96927a..4ea4c310db52 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -315,6 +315,21 @@
#endif
.endm
+/*
+ * Macros to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro EXEC_VERW
+ verw _ASM_RIP(mds_verw_sel)
+.endm
+
+.macro CLEAR_CPU_BUFFERS
+ ALTERNATIVE "", __stringify(EXEC_VERW), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
--
2.34.1
On Tue, Jan 23, 2024 at 11:41:01PM -0800, Pawan Gupta wrote: > index 4af140cf5719..79a7e81b9458 100644 > --- a/arch/x86/include/asm/cpufeatures.h > +++ b/arch/x86/include/asm/cpufeatures.h > @@ -308,10 +308,10 @@ > #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ > #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ > #define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ > - > #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ > #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ > #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ > +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+27) /* "" Clear CPU buffers using VERW */ This will need to be rebased. And the "11*32" level is now full in Linus' tree, so this will presumably need to go to a different "level". -- Josh
On Thu, Feb 01, 2024 at 07:29:09PM -0800, Josh Poimboeuf wrote: > On Tue, Jan 23, 2024 at 11:41:01PM -0800, Pawan Gupta wrote: > > index 4af140cf5719..79a7e81b9458 100644 > > --- a/arch/x86/include/asm/cpufeatures.h > > +++ b/arch/x86/include/asm/cpufeatures.h > > @@ -308,10 +308,10 @@ > > #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ > > #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ > > #define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ > > - > > #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ > > #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ > > #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ > > +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+27) /* "" Clear CPU buffers using VERW */ > > This will need to be rebased. And the "11*32" level is now full in > Linus' tree, so this will presumably need to go to a different "level". Yes, will send a new rebased version.
On Tue, Jan 23, 2024 at 11:41:01PM -0800, Pawan Gupta wrote: > MDS mitigation requires clearing the CPU buffers before returning to > user. This needs to be done late in the exit-to-user path. Current > location of VERW leaves a possibility of kernel data ending up in CPU > buffers for memory accesses done after VERW such as: > > 1. Kernel data accessed by an NMI between VERW and return-to-user can > remain in CPU buffers since NMI returning to kernel does not > execute VERW to clear CPU buffers. > 2. Alyssa reported that after VERW is executed, > CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system > call. Memory accesses during stack scrubbing can move kernel stack > contents into CPU buffers. > 3. When caller saved registers are restored after a return from > function executing VERW, the kernel stack accesses can remain in > CPU buffers(since they occur after VERW). > > To fix this VERW needs to be moved very late in exit-to-user path. > > In preparation for moving VERW to entry/exit asm code, create macros > that can be used in asm. Also make VERW patching depend on a new feature > flag X86_FEATURE_CLEAR_CPU_BUF. > > Reported-by: Alyssa Milburn <alyssa.milburn@intel.com> > Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com> > Suggested-by: Peter Zijlstra <peterz@infradead.org> > Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> > --- > arch/x86/entry/entry.S | 22 ++++++++++++++++++++++ > arch/x86/include/asm/cpufeatures.h | 2 +- > arch/x86/include/asm/nospec-branch.h | 15 +++++++++++++++ > 3 files changed, 38 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S > index 8c8d38f0cb1d..bd8e77c5a375 100644 > --- a/arch/x86/entry/entry.S > +++ b/arch/x86/entry/entry.S > @@ -6,6 +6,9 @@ > #include <linux/export.h> > #include <linux/linkage.h> > #include <asm/msr-index.h> > +#include <asm/unwind_hints.h> > +#include <asm/segment.h> > +#include <asm/cache.h> > > .pushsection .noinstr.text, "ax" > > @@ -20,3 +23,22 @@ SYM_FUNC_END(entry_ibpb) > EXPORT_SYMBOL_GPL(entry_ibpb); > > .popsection > + > +/* > + * Defines the VERW operand that is disguised as entry code so that > + * it can be referenced with KPTI enabled. This ensures VERW can be > + * used late in exit-to-user path after page tables are switched. > + */ > +.pushsection .entry.text, "ax" > + > +.align L1_CACHE_BYTES, 0xcc > +SYM_CODE_START_NOALIGN(mds_verw_sel) > + UNWIND_HINT_UNDEFINED > + ANNOTATE_NOENDBR > + .word __KERNEL_DS > +.align L1_CACHE_BYTES, 0xcc > +SYM_CODE_END(mds_verw_sel); > +/* For KVM */ > +EXPORT_SYMBOL_GPL(mds_verw_sel); I realized this needs an extern: diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 0a8fa023a804..9daf92071f77 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -550,6 +550,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include <asm/segment.h> /**
© 2016 - 2025 Red Hat, Inc.