When a system is only affected by MMIO Stale Data, VERW mitigation is
currently handled differently than other data sampling attacks like
MDS/TAA/RFDS, that do the VERW in asm. This is because for MMIO Stale Data,
VERW is needed only when the guest can access host MMIO, this was tricky to
check in asm.
Refactoring done by:
83ebe7157483 ("KVM: VMX: Apply MMIO Stale Data mitigation if KVM maps
MMIO into the guest")
now makes it easier to execute VERW conditionally in asm based on
VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO.
Unify MMIO Stale Data mitigation with other VERW-based mitigations and only
have single VERW callsite in __vmx_vcpu_run(). Remove the now unnecessary
call to x86_clear_cpu_buffer() in vmx_vcpu_enter_exit().
This also untangles L1D Flush and MMIO Stale Data mitigation. Earlier, an
L1D Flush would skip the VERW for MMIO Stale Data. Now, both the
mitigations are independent of each other. Although, this has little
practical implication since there are no CPUs that are affected by L1TF and
are *only* affected by MMIO Stale Data (i.e. not affected by MDS/TAA/RFDS).
But, this makes the code cleaner and easier to maintain.
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
---
arch/x86/kvm/vmx/run_flags.h | 12 ++++++------
arch/x86/kvm/vmx/vmenter.S | 5 +++++
arch/x86/kvm/vmx/vmx.c | 26 ++++++++++----------------
3 files changed, 21 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index 2f20fb170def8b10c8c0c46f7ba751f845c19e2c..004fe1ca89f05524bf3986540056de2caf0abbad 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,12 +2,12 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
-#define VMX_RUN_VMRESUME_SHIFT 0
-#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
-#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2
+#define VMX_RUN_VMRESUME_SHIFT 0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
+#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
-#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
-#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
-#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT)
+#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
+#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 0dd23beae207795484150698d1674dc4044cc520..ec91f4267eca319ffa8e6079887e8dfecc7f96d8 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -137,6 +137,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load @regs to RAX. */
mov (%_ASM_SP), %_ASM_AX
+ /* jz .Lskip_clear_cpu_buffers below relies on this */
+ test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
+
/* Check if vmlaunch or vmresume is needed */
bt $VMX_RUN_VMRESUME_SHIFT, %ebx
@@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+ /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
+ jz .Lskip_clear_cpu_buffers
/* Clobbers EFLAGS.ZF */
VM_CLEAR_CPU_BUFFERS
.Lskip_clear_cpu_buffers:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 451be757b3d1b2fec6b2b79157f26dd43bc368b8..303935882a9f8d1d8f81a499cdce1fdc8dad62f0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -903,9 +903,16 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
flags |= VMX_RUN_SAVE_SPEC_CTRL;
- if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only) &&
- kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
- flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO;
+ /*
+ * When affected by MMIO Stale Data only (and not other data sampling
+ * attacks) only clear for MMIO-capable guests.
+ */
+ if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
+ if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
+ flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
+ } else {
+ flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
+ }
return flags;
}
@@ -7320,21 +7327,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_state_enter_irqoff();
- /*
- * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
- * mitigation for MDS is done late in VMentry and is still
- * executed in spite of L1D Flush. This is because an extra VERW
- * should not matter much after the big hammer L1D Flush.
- *
- * cpu_buf_vm_clear is used when system is not vulnerable to MDS/TAA,
- * and is affected by MMIO Stale Data. In such cases mitigation in only
- * needed against an MMIO capable guest.
- */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&cpu_buf_vm_clear) &&
- (flags & VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO))
- x86_clear_cpu_buffers();
vmx_disable_fb_clear(vmx);
--
2.34.1
On Wed Oct 29, 2025 at 9:26 PM UTC, Pawan Gupta wrote:
> When a system is only affected by MMIO Stale Data, VERW mitigation is
> currently handled differently than other data sampling attacks like
> MDS/TAA/RFDS, that do the VERW in asm. This is because for MMIO Stale Data,
> VERW is needed only when the guest can access host MMIO, this was tricky to
> check in asm.
>
> Refactoring done by:
>
> 83ebe7157483 ("KVM: VMX: Apply MMIO Stale Data mitigation if KVM maps
> MMIO into the guest")
>
> now makes it easier to execute VERW conditionally in asm based on
> VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO.
>
> Unify MMIO Stale Data mitigation with other VERW-based mitigations and only
> have single VERW callsite in __vmx_vcpu_run(). Remove the now unnecessary
> call to x86_clear_cpu_buffer() in vmx_vcpu_enter_exit().
>
> This also untangles L1D Flush and MMIO Stale Data mitigation. Earlier, an
> L1D Flush would skip the VERW for MMIO Stale Data. Now, both the
> mitigations are independent of each other. Although, this has little
> practical implication since there are no CPUs that are affected by L1TF and
> are *only* affected by MMIO Stale Data (i.e. not affected by MDS/TAA/RFDS).
> But, this makes the code cleaner and easier to maintain.
>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> ---
> arch/x86/kvm/vmx/run_flags.h | 12 ++++++------
> arch/x86/kvm/vmx/vmenter.S | 5 +++++
> arch/x86/kvm/vmx/vmx.c | 26 ++++++++++----------------
> 3 files changed, 21 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
> index 2f20fb170def8b10c8c0c46f7ba751f845c19e2c..004fe1ca89f05524bf3986540056de2caf0abbad 100644
> --- a/arch/x86/kvm/vmx/run_flags.h
> +++ b/arch/x86/kvm/vmx/run_flags.h
> @@ -2,12 +2,12 @@
> #ifndef __KVM_X86_VMX_RUN_FLAGS_H
> #define __KVM_X86_VMX_RUN_FLAGS_H
>
> -#define VMX_RUN_VMRESUME_SHIFT 0
> -#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2
> +#define VMX_RUN_VMRESUME_SHIFT 0
> +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> +#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
>
> -#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> -#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT)
> +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> +#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
>
> #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> index 0dd23beae207795484150698d1674dc4044cc520..ec91f4267eca319ffa8e6079887e8dfecc7f96d8 100644
> --- a/arch/x86/kvm/vmx/vmenter.S
> +++ b/arch/x86/kvm/vmx/vmenter.S
> @@ -137,6 +137,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
> /* Load @regs to RAX. */
> mov (%_ASM_SP), %_ASM_AX
>
> + /* jz .Lskip_clear_cpu_buffers below relies on this */
> + test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
> +
> /* Check if vmlaunch or vmresume is needed */
> bt $VMX_RUN_VMRESUME_SHIFT, %ebx
>
> @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
> /* Load guest RAX. This kills the @regs pointer! */
> mov VCPU_RAX(%_ASM_AX), %_ASM_AX
>
> + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> + jz .Lskip_clear_cpu_buffers
Hm, it's a bit weird that we have the "alternative" inside
VM_CLEAR_CPU_BUFFERS, but then we still keep the test+jz
unconditionally.
If we really want to super-optimise the no-mitigations-needed case,
shouldn't we want to avoid the conditional in the asm if it never
actually leads to a flush?
On the other hand, if we don't mind a couple of extra instructions,
shouldn't we be fine with just having the whole asm code based solely
on VMX_RUN_CLEAR_CPU_BUFFERS and leaving the
X86_FEATURE_CLEAR_CPU_BUF_VM to the C code?
I guess the issue is that in the latter case we'd be back to having
unnecessary inconsistency with AMD code while in the former case... well
that would just be really annoying asm code - am I on the right
wavelength there? So I'm not necessarily asking for changes here, just
probing in case it prompts any interesting insights on your side.
(Also, maybe this test+jz has a similar cost to the nops that the
"alternative" would inject anyway...?)
On Thu, Oct 30, 2025 at 12:52:12PM +0000, Brendan Jackman wrote:
> On Wed Oct 29, 2025 at 9:26 PM UTC, Pawan Gupta wrote:
> > When a system is only affected by MMIO Stale Data, VERW mitigation is
> > currently handled differently than other data sampling attacks like
> > MDS/TAA/RFDS, that do the VERW in asm. This is because for MMIO Stale Data,
> > VERW is needed only when the guest can access host MMIO, this was tricky to
> > check in asm.
> >
> > Refactoring done by:
> >
> > 83ebe7157483 ("KVM: VMX: Apply MMIO Stale Data mitigation if KVM maps
> > MMIO into the guest")
> >
> > now makes it easier to execute VERW conditionally in asm based on
> > VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO.
> >
> > Unify MMIO Stale Data mitigation with other VERW-based mitigations and only
> > have single VERW callsite in __vmx_vcpu_run(). Remove the now unnecessary
> > call to x86_clear_cpu_buffer() in vmx_vcpu_enter_exit().
> >
> > This also untangles L1D Flush and MMIO Stale Data mitigation. Earlier, an
> > L1D Flush would skip the VERW for MMIO Stale Data. Now, both the
> > mitigations are independent of each other. Although, this has little
> > practical implication since there are no CPUs that are affected by L1TF and
> > are *only* affected by MMIO Stale Data (i.e. not affected by MDS/TAA/RFDS).
> > But, this makes the code cleaner and easier to maintain.
> >
> > Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> > ---
> > arch/x86/kvm/vmx/run_flags.h | 12 ++++++------
> > arch/x86/kvm/vmx/vmenter.S | 5 +++++
> > arch/x86/kvm/vmx/vmx.c | 26 ++++++++++----------------
> > 3 files changed, 21 insertions(+), 22 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
> > index 2f20fb170def8b10c8c0c46f7ba751f845c19e2c..004fe1ca89f05524bf3986540056de2caf0abbad 100644
> > --- a/arch/x86/kvm/vmx/run_flags.h
> > +++ b/arch/x86/kvm/vmx/run_flags.h
> > @@ -2,12 +2,12 @@
> > #ifndef __KVM_X86_VMX_RUN_FLAGS_H
> > #define __KVM_X86_VMX_RUN_FLAGS_H
> >
> > -#define VMX_RUN_VMRESUME_SHIFT 0
> > -#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> > -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2
> > +#define VMX_RUN_VMRESUME_SHIFT 0
> > +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> > +#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
> >
> > -#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> > -#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> > -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT)
> > +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> > +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> > +#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
> >
> > #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
> > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> > index 0dd23beae207795484150698d1674dc4044cc520..ec91f4267eca319ffa8e6079887e8dfecc7f96d8 100644
> > --- a/arch/x86/kvm/vmx/vmenter.S
> > +++ b/arch/x86/kvm/vmx/vmenter.S
> > @@ -137,6 +137,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
> > /* Load @regs to RAX. */
> > mov (%_ASM_SP), %_ASM_AX
> >
> > + /* jz .Lskip_clear_cpu_buffers below relies on this */
> > + test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
> > +
> > /* Check if vmlaunch or vmresume is needed */
> > bt $VMX_RUN_VMRESUME_SHIFT, %ebx
> >
> > @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
> > /* Load guest RAX. This kills the @regs pointer! */
> > mov VCPU_RAX(%_ASM_AX), %_ASM_AX
> >
> > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> > + jz .Lskip_clear_cpu_buffers
>
> Hm, it's a bit weird that we have the "alternative" inside
> VM_CLEAR_CPU_BUFFERS, but then we still keep the test+jz
> unconditionally.
Exactly, but it is tricky to handle the below 2 cases in asm:
1. MDS -> Always do VM_CLEAR_CPU_BUFFERS
2. MMIO -> Do VM_CLEAR_CPU_BUFFERS only if guest can access host MMIO
In th MMIO case, one guest may have access to host MMIO while another may
not. Alternatives alone can't handle this case as they patch code at boot
which is then set in stone. One way is to move the conditional inside
VM_CLEAR_CPU_BUFFERS that gets a flag as an arguement.
> If we really want to super-optimise the no-mitigations-needed case,
> shouldn't we want to avoid the conditional in the asm if it never
> actually leads to a flush?
Ya, so effectively, have VM_CLEAR_CPU_BUFFERS alternative spit out
conditional VERW when affected by MMIO_only, otherwise an unconditional
VERW.
> On the other hand, if we don't mind a couple of extra instructions,
> shouldn't we be fine with just having the whole asm code based solely
> on VMX_RUN_CLEAR_CPU_BUFFERS and leaving the
> X86_FEATURE_CLEAR_CPU_BUF_VM to the C code?
That's also an option.
> I guess the issue is that in the latter case we'd be back to having
> unnecessary inconsistency with AMD code while in the former case... well
> that would just be really annoying asm code - am I on the right
> wavelength there? So I'm not necessarily asking for changes here, just
> probing in case it prompts any interesting insights on your side.
>
> (Also, maybe this test+jz has a similar cost to the nops that the
> "alternative" would inject anyway...?)
Likely yes. test+jz is a necessary evil that is needed for MMIO Stale Data
for different per-guest handling.
On Thu, Oct 30, 2025, Pawan Gupta wrote:
> On Thu, Oct 30, 2025 at 12:52:12PM +0000, Brendan Jackman wrote:
> > On Wed Oct 29, 2025 at 9:26 PM UTC, Pawan Gupta wrote:
> > > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> > > + jz .Lskip_clear_cpu_buffers
> >
> > Hm, it's a bit weird that we have the "alternative" inside
> > VM_CLEAR_CPU_BUFFERS, but then we still keep the test+jz
> > unconditionally.
>
> Exactly, but it is tricky to handle the below 2 cases in asm:
>
> 1. MDS -> Always do VM_CLEAR_CPU_BUFFERS
>
> 2. MMIO -> Do VM_CLEAR_CPU_BUFFERS only if guest can access host MMIO
Overloading VM_CLEAR_CPU_BUFFERS for MMIO is all kinds of confusing, e.g. my
pseudo-patch messed things. It's impossible to understand
> In th MMIO case, one guest may have access to host MMIO while another may
> not. Alternatives alone can't handle this case as they patch code at boot
> which is then set in stone. One way is to move the conditional inside
> VM_CLEAR_CPU_BUFFERS that gets a flag as an arguement.
>
> > If we really want to super-optimise the no-mitigations-needed case,
> > shouldn't we want to avoid the conditional in the asm if it never
> > actually leads to a flush?
>
> Ya, so effectively, have VM_CLEAR_CPU_BUFFERS alternative spit out
> conditional VERW when affected by MMIO_only, otherwise an unconditional
> VERW.
>
> > On the other hand, if we don't mind a couple of extra instructions,
> > shouldn't we be fine with just having the whole asm code based solely
> > on VMX_RUN_CLEAR_CPU_BUFFERS and leaving the
> > X86_FEATURE_CLEAR_CPU_BUF_VM to the C code?
>
> That's also an option.
>
> > I guess the issue is that in the latter case we'd be back to having
> > unnecessary inconsistency with AMD code while in the former case... well
> > that would just be really annoying asm code - am I on the right
> > wavelength there? So I'm not necessarily asking for changes here, just
> > probing in case it prompts any interesting insights on your side.
> >
> > (Also, maybe this test+jz has a similar cost to the nops that the
> > "alternative" would inject anyway...?)
>
> Likely yes. test+jz is a necessary evil that is needed for MMIO Stale Data
> for different per-guest handling.
I don't like any of those options :-)
I again vote to add X86_FEATURE_CLEAR_CPU_BUF_MMIO, and then have it be mutually
exlusive with X86_FEATURE_CLEAR_CPU_BUF_VM, i.e. be an alterantive, not a subset.
Because as proposed, the MMIO case _isn't_ a strict subset, it's a subset iff
the MMIO mitigation is enabled, otherwise it's something else entirely.
After half an hour of debugging godawful assembler errors because I used stringify()
instead of __stringify(), I was able to get to this, which IMO is readable and
intuitive.
/* Clobbers EFLAGS.ZF */
ALTERNATIVE_2 "", \
__CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF_VM, \
__stringify(jz .Lskip_clear_cpu_buffers; \
CLEAR_CPU_BUFFERS_SEQ; \
.Lskip_clear_cpu_buffers:), \
X86_FEATURE_CLEAR_CPU_BUF_MMIO
Without overloading X86_FEATURE_CLEAR_CPU_BUF_VM, e.g. the conversion from a
static branch to X86_FEATURE_CLEAR_CPU_BUF_MMIO is a pure conversion and yields:
if (verw_clear_cpu_buf_mitigation_selected) {
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
} else {
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_MMIO);
}
Give me a few hours to test, and I'll post a v2. The patches are:
Pawan Gupta (1):
x86/bugs: Use VM_CLEAR_CPU_BUFFERS in VMX as well
Sean Christopherson (4):
x86/bugs: Decouple ALTERNATIVE usage from VERW macro definition
x86/bugs: Use an X86_FEATURE_xxx flag for the MMIO Stale Data mitigation
KVM: VMX: Handle MMIO Stale Data in VM-Enter assembly via ALTERNATIVES_2
x86/bugs: KVM: Move VM_CLEAR_CPU_BUFFERS into SVM as SVM_CLEAR_CPU_BUFFERS
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/nospec-branch.h | 24 +++++++++---------------
arch/x86/kernel/cpu/bugs.c | 18 +++++++-----------
arch/x86/kvm/mmu/spte.c | 2 +-
arch/x86/kvm/svm/vmenter.S | 6 ++++--
arch/x86/kvm/vmx/vmenter.S | 13 ++++++++++++-
arch/x86/kvm/vmx/vmx.c | 15 +--------------
7 files changed, 35 insertions(+), 44 deletions(-)
On Thu, Oct 30, 2025 at 11:21:52AM -0700, Sean Christopherson wrote:
> On Thu, Oct 30, 2025, Pawan Gupta wrote:
> > On Thu, Oct 30, 2025 at 12:52:12PM +0000, Brendan Jackman wrote:
> > > On Wed Oct 29, 2025 at 9:26 PM UTC, Pawan Gupta wrote:
> > > > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> > > > + jz .Lskip_clear_cpu_buffers
> > >
> > > Hm, it's a bit weird that we have the "alternative" inside
> > > VM_CLEAR_CPU_BUFFERS, but then we still keep the test+jz
> > > unconditionally.
> >
> > Exactly, but it is tricky to handle the below 2 cases in asm:
> >
> > 1. MDS -> Always do VM_CLEAR_CPU_BUFFERS
> >
> > 2. MMIO -> Do VM_CLEAR_CPU_BUFFERS only if guest can access host MMIO
>
> Overloading VM_CLEAR_CPU_BUFFERS for MMIO is all kinds of confusing, e.g. my
> pseudo-patch messed things. It's impossible to understand
Agree.
> > In th MMIO case, one guest may have access to host MMIO while another may
> > not. Alternatives alone can't handle this case as they patch code at boot
> > which is then set in stone. One way is to move the conditional inside
> > VM_CLEAR_CPU_BUFFERS that gets a flag as an arguement.
> >
> > > If we really want to super-optimise the no-mitigations-needed case,
> > > shouldn't we want to avoid the conditional in the asm if it never
> > > actually leads to a flush?
> >
> > Ya, so effectively, have VM_CLEAR_CPU_BUFFERS alternative spit out
> > conditional VERW when affected by MMIO_only, otherwise an unconditional
> > VERW.
> >
> > > On the other hand, if we don't mind a couple of extra instructions,
> > > shouldn't we be fine with just having the whole asm code based solely
> > > on VMX_RUN_CLEAR_CPU_BUFFERS and leaving the
> > > X86_FEATURE_CLEAR_CPU_BUF_VM to the C code?
> >
> > That's also an option.
> >
> > > I guess the issue is that in the latter case we'd be back to having
> > > unnecessary inconsistency with AMD code while in the former case... well
> > > that would just be really annoying asm code - am I on the right
> > > wavelength there? So I'm not necessarily asking for changes here, just
> > > probing in case it prompts any interesting insights on your side.
> > >
> > > (Also, maybe this test+jz has a similar cost to the nops that the
> > > "alternative" would inject anyway...?)
> >
> > Likely yes. test+jz is a necessary evil that is needed for MMIO Stale Data
> > for different per-guest handling.
>
> I don't like any of those options :-)
>
> I again vote to add X86_FEATURE_CLEAR_CPU_BUF_MMIO, and then have it be mutually
> exlusive with X86_FEATURE_CLEAR_CPU_BUF_VM, i.e. be an alterantive, not a subset.
> Because as proposed, the MMIO case _isn't_ a strict subset, it's a subset iff
> the MMIO mitigation is enabled, otherwise it's something else entirely.
I don't see a problem with that.
> After half an hour of debugging godawful assembler errors because I used stringify()
> instead of __stringify(),
Not surprised at all :-)
> I was able to get to this, which IMO is readable and intuitive.
>
> /* Clobbers EFLAGS.ZF */
> ALTERNATIVE_2 "", \
> __CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF_VM, \
> __stringify(jz .Lskip_clear_cpu_buffers; \
> CLEAR_CPU_BUFFERS_SEQ; \
Curious what this is doing, I will wait for your patches.
> .Lskip_clear_cpu_buffers:), \
> X86_FEATURE_CLEAR_CPU_BUF_MMIO
>
> Without overloading X86_FEATURE_CLEAR_CPU_BUF_VM, e.g. the conversion from a
> static branch to X86_FEATURE_CLEAR_CPU_BUF_MMIO is a pure conversion and yields:
>
> if (verw_clear_cpu_buf_mitigation_selected) {
> setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
> setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
> } else {
> setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_MMIO);
> }
>
> Give me a few hours to test, and I'll post a v2. The patches are:
>
> Pawan Gupta (1):
> x86/bugs: Use VM_CLEAR_CPU_BUFFERS in VMX as well
>
> Sean Christopherson (4):
> x86/bugs: Decouple ALTERNATIVE usage from VERW macro definition
> x86/bugs: Use an X86_FEATURE_xxx flag for the MMIO Stale Data mitigation
> KVM: VMX: Handle MMIO Stale Data in VM-Enter assembly via ALTERNATIVES_2
> x86/bugs: KVM: Move VM_CLEAR_CPU_BUFFERS into SVM as SVM_CLEAR_CPU_BUFFERS
Ok, sounds good to me.
On Thu, Oct 30, 2025, Brendan Jackman wrote:
> > @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
> > /* Load guest RAX. This kills the @regs pointer! */
> > mov VCPU_RAX(%_ASM_AX), %_ASM_AX
> >
> > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> > + jz .Lskip_clear_cpu_buffers
>
> Hm, it's a bit weird that we have the "alternative" inside
> VM_CLEAR_CPU_BUFFERS, but then we still keep the test+jz
> unconditionally.
Yeah, I had the same reaction, but couldn't come up with a clean-ish solution
and so ignored it :-)
> If we really want to super-optimise the no-mitigations-needed case,
> shouldn't we want to avoid the conditional in the asm if it never
> actually leads to a flush?
>
> On the other hand, if we don't mind a couple of extra instructions,
> shouldn't we be fine with just having the whole asm code based solely
> on VMX_RUN_CLEAR_CPU_BUFFERS and leaving the
> X86_FEATURE_CLEAR_CPU_BUF_VM to the C code?
>
> I guess the issue is that in the latter case we'd be back to having
> unnecessary inconsistency with AMD code while in the former case... well
> that would just be really annoying asm code - am I on the right
> wavelength there? So I'm not necessarily asking for changes here, just
> probing in case it prompts any interesting insights on your side.
>
> (Also, maybe this test+jz has a similar cost to the nops that the
> "alternative" would inject anyway...?)
It's not at all expensive. My bigger objection is that it's hard to follow what's
happening.
Aha! Idea. IIUC, only the MMIO Stale Data is conditional based on the properties
of the vCPU, so we should track _that_ in a KVM_RUN flag. And then if we add yet
another X86_FEATURE for MMIO Stale Data flushing (instead of a static branch),
this path can use ALTERNATIVE_2. The use of ALTERNATIVE_2 isn't exactly pretty,
but IMO this is much more intuitive.
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index 004fe1ca89f0..b9651960e069 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -4,10 +4,10 @@
#define VMX_RUN_VMRESUME_SHIFT 0
#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
-#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
+#define VMX_RUN_CAN_ACCESS_HOST_MMIO_SHIT 2
#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
-#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
+#define VMX_RUN_CAN_ACCESS_HOST_MMIO BIT(VMX_RUN_CAN_ACCESS_HOST_MMIO_SHIT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index ec91f4267eca..50a748b489b4 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -137,8 +137,10 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load @regs to RAX. */
mov (%_ASM_SP), %_ASM_AX
- /* jz .Lskip_clear_cpu_buffers below relies on this */
- test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
+ /* Check if jz .Lskip_clear_cpu_buffers below relies on this */
+ ALTERNATIVE_2 "",
+ "", X86_FEATURE_CLEAR_CPU_BUF
+ "test $VMX_RUN_CAN_ACCESS_HOST_MMIO, %ebx", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO
/* Check if vmlaunch or vmresume is needed */
bt $VMX_RUN_VMRESUME_SHIFT, %ebx
@@ -163,8 +165,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
- jz .Lskip_clear_cpu_buffers
+ ALTERNATIVE_2 "jmp .Lskip_clear_cpu_buffers",
+ "", X86_FEATURE_CLEAR_CPU_BUF
+ "jz .Lskip_clear_cpu_buffers", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO
/* Clobbers EFLAGS.ZF */
VM_CLEAR_CPU_BUFFERS
.Lskip_clear_cpu_buffers:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 303935882a9f..b9e7247e6b9a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -903,16 +903,9 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
flags |= VMX_RUN_SAVE_SPEC_CTRL;
- /*
- * When affected by MMIO Stale Data only (and not other data sampling
- * attacks) only clear for MMIO-capable guests.
- */
- if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
- if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
- flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
- } else {
- flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
- }
+ if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO) &&
+ kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
+ flags |= VMX_RUN_CAN_ACCESS_HOST_MMIO;
return flags;
}
On Thu, Oct 30, 2025 at 09:06:41AM -0700, Sean Christopherson wrote:
> On Thu, Oct 30, 2025, Brendan Jackman wrote:
> > > @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
> > > /* Load guest RAX. This kills the @regs pointer! */
> > > mov VCPU_RAX(%_ASM_AX), %_ASM_AX
> > >
> > > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> > > + jz .Lskip_clear_cpu_buffers
> >
> > Hm, it's a bit weird that we have the "alternative" inside
> > VM_CLEAR_CPU_BUFFERS, but then we still keep the test+jz
> > unconditionally.
>
> Yeah, I had the same reaction, but couldn't come up with a clean-ish solution
> and so ignored it :-)
Ya, it is tricky to handle per-guest mitigation for MMIO in a clean way.
> > If we really want to super-optimise the no-mitigations-needed case,
> > shouldn't we want to avoid the conditional in the asm if it never
> > actually leads to a flush?
> >
> > On the other hand, if we don't mind a couple of extra instructions,
> > shouldn't we be fine with just having the whole asm code based solely
> > on VMX_RUN_CLEAR_CPU_BUFFERS and leaving the
> > X86_FEATURE_CLEAR_CPU_BUF_VM to the C code?
> >
> > I guess the issue is that in the latter case we'd be back to having
> > unnecessary inconsistency with AMD code while in the former case... well
> > that would just be really annoying asm code - am I on the right
> > wavelength there? So I'm not necessarily asking for changes here, just
> > probing in case it prompts any interesting insights on your side.
> >
> > (Also, maybe this test+jz has a similar cost to the nops that the
> > "alternative" would inject anyway...?)
>
> It's not at all expensive. My bigger objection is that it's hard to follow what's
> happening.
>
> Aha! Idea. IIUC, only the MMIO Stale Data is conditional based on the properties
> of the vCPU, so we should track _that_ in a KVM_RUN flag. And then if we add yet
> another X86_FEATURE for MMIO Stale Data flushing (instead of a static branch),
> this path can use ALTERNATIVE_2. The use of ALTERNATIVE_2 isn't exactly pretty,
> but IMO this is much more intuitive.
>
> diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
> index 004fe1ca89f0..b9651960e069 100644
> --- a/arch/x86/kvm/vmx/run_flags.h
> +++ b/arch/x86/kvm/vmx/run_flags.h
> @@ -4,10 +4,10 @@
>
> #define VMX_RUN_VMRESUME_SHIFT 0
> #define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> -#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
> +#define VMX_RUN_CAN_ACCESS_HOST_MMIO_SHIT 2
>
> #define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> #define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> -#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
> +#define VMX_RUN_CAN_ACCESS_HOST_MMIO BIT(VMX_RUN_CAN_ACCESS_HOST_MMIO_SHIT)
>
> #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> index ec91f4267eca..50a748b489b4 100644
> --- a/arch/x86/kvm/vmx/vmenter.S
> +++ b/arch/x86/kvm/vmx/vmenter.S
> @@ -137,8 +137,10 @@ SYM_FUNC_START(__vmx_vcpu_run)
> /* Load @regs to RAX. */
> mov (%_ASM_SP), %_ASM_AX
>
> - /* jz .Lskip_clear_cpu_buffers below relies on this */
> - test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
> + /* Check if jz .Lskip_clear_cpu_buffers below relies on this */
> + ALTERNATIVE_2 "",
> + "", X86_FEATURE_CLEAR_CPU_BUF
> + "test $VMX_RUN_CAN_ACCESS_HOST_MMIO, %ebx", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO
This approach looks better. I think we will be fine without ALTERNATIVE_2:
ALTERNATIVE "", "test $VMX_RUN_CAN_ACCESS_HOST_MMIO, %ebx", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO
> /* Check if vmlaunch or vmresume is needed */
> bt $VMX_RUN_VMRESUME_SHIFT, %ebx
> @@ -163,8 +165,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
> /* Load guest RAX. This kills the @regs pointer! */
> mov VCPU_RAX(%_ASM_AX), %_ASM_AX
>
> - /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> - jz .Lskip_clear_cpu_buffers
> + ALTERNATIVE_2 "jmp .Lskip_clear_cpu_buffers",
> + "", X86_FEATURE_CLEAR_CPU_BUF
> + "jz .Lskip_clear_cpu_buffers", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO
I am not 100% sure, but I believe the _MMIO check needs to be before
X86_FEATURE_CLEAR_CPU_BUF_VM, because MMIO mitigation also sets _VM:
ALTERNATIVE_2 "jmp .Lskip_clear_cpu_buffers",
"jz .Lskip_clear_cpu_buffers", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO
"", X86_FEATURE_CLEAR_CPU_BUF_VM
> /* Clobbers EFLAGS.ZF */
> VM_CLEAR_CPU_BUFFERS
> .Lskip_clear_cpu_buffers:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 303935882a9f..b9e7247e6b9a 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -903,16 +903,9 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
> flags |= VMX_RUN_SAVE_SPEC_CTRL;
>
> - /*
> - * When affected by MMIO Stale Data only (and not other data sampling
> - * attacks) only clear for MMIO-capable guests.
> - */
> - if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
> - if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> - flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> - } else {
> - flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> - }
> + if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO) &&
> + kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> + flags |= VMX_RUN_CAN_ACCESS_HOST_MMIO;
Thanks Sean! This is much cleaner.
On Thu Oct 30, 2025 at 4:06 PM UTC, Sean Christopherson wrote: > On Thu, Oct 30, 2025, Brendan Jackman wrote: >> > @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run) >> > /* Load guest RAX. This kills the @regs pointer! */ >> > mov VCPU_RAX(%_ASM_AX), %_ASM_AX >> > >> > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */ >> > + jz .Lskip_clear_cpu_buffers >> >> Hm, it's a bit weird that we have the "alternative" inside >> VM_CLEAR_CPU_BUFFERS, but then we still keep the test+jz >> unconditionally. > > Yeah, I had the same reaction, but couldn't come up with a clean-ish solution > and so ignored it :-) > >> If we really want to super-optimise the no-mitigations-needed case, >> shouldn't we want to avoid the conditional in the asm if it never >> actually leads to a flush? >> >> On the other hand, if we don't mind a couple of extra instructions, >> shouldn't we be fine with just having the whole asm code based solely >> on VMX_RUN_CLEAR_CPU_BUFFERS and leaving the >> X86_FEATURE_CLEAR_CPU_BUF_VM to the C code? >> >> I guess the issue is that in the latter case we'd be back to having >> unnecessary inconsistency with AMD code while in the former case... well >> that would just be really annoying asm code - am I on the right >> wavelength there? So I'm not necessarily asking for changes here, just >> probing in case it prompts any interesting insights on your side. >> >> (Also, maybe this test+jz has a similar cost to the nops that the >> "alternative" would inject anyway...?) > > It's not at all expensive. My bigger objection is that it's hard to follow what's > happening. > > Aha! Idea. IIUC, only the MMIO Stale Data is conditional based on the properties > of the vCPU, so we should track _that_ in a KVM_RUN flag. And then if we add yet > another X86_FEATURE for MMIO Stale Data flushing (instead of a static branch), > this path can use ALTERNATIVE_2. The use of ALTERNATIVE_2 isn't exactly pretty, > but IMO this is much more intuitive. > > diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h > index 004fe1ca89f0..b9651960e069 100644 > --- a/arch/x86/kvm/vmx/run_flags.h > +++ b/arch/x86/kvm/vmx/run_flags.h > @@ -4,10 +4,10 @@ > > #define VMX_RUN_VMRESUME_SHIFT 0 > #define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 > -#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2 > +#define VMX_RUN_CAN_ACCESS_HOST_MMIO_SHIT 2 > > #define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) > #define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) > -#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT) > +#define VMX_RUN_CAN_ACCESS_HOST_MMIO BIT(VMX_RUN_CAN_ACCESS_HOST_MMIO_SHIT) > > #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S > index ec91f4267eca..50a748b489b4 100644 > --- a/arch/x86/kvm/vmx/vmenter.S > +++ b/arch/x86/kvm/vmx/vmenter.S > @@ -137,8 +137,10 @@ SYM_FUNC_START(__vmx_vcpu_run) > /* Load @regs to RAX. */ > mov (%_ASM_SP), %_ASM_AX > > - /* jz .Lskip_clear_cpu_buffers below relies on this */ > - test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx > + /* Check if jz .Lskip_clear_cpu_buffers below relies on this */ > + ALTERNATIVE_2 "", > + "", X86_FEATURE_CLEAR_CPU_BUF > + "test $VMX_RUN_CAN_ACCESS_HOST_MMIO, %ebx", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO Er, I don't understand the ALTERNATIVE_2 here, don't we just need this? ALTERNATIVE "" "test $VMX_RUN_CAN_ACCESS_HOST_MMIO, %ebx", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO > > /* Check if vmlaunch or vmresume is needed */ > bt $VMX_RUN_VMRESUME_SHIFT, %ebx > @@ -163,8 +165,9 @@ SYM_FUNC_START(__vmx_vcpu_run) > /* Load guest RAX. This kills the @regs pointer! */ > mov VCPU_RAX(%_ASM_AX), %_ASM_AX > > - /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */ > - jz .Lskip_clear_cpu_buffers > + ALTERNATIVE_2 "jmp .Lskip_clear_cpu_buffers", > + "", X86_FEATURE_CLEAR_CPU_BUF > + "jz .Lskip_clear_cpu_buffers", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO To fit with the rest of Pawan's code this would need s/X86_FEATURE_CLEAR_CPU_BUF/X86_FEATURE_CLEAR_CPU_BUF_VM/, right? In case it reveals that I just don't understand ALTERNATIVE_2 at all, I'm reading this second one as saying: if cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO) "jz .Lskip_clear_cpu_buffers " else if !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF_VM) "jmp .Lskip_clear_cpu_buffers" I.e. I'm understanding X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO as mutually exclusive with X86_FEATURE_CLEAR_CPU_BUF_VM, it means "you _only_ need to verw MMIO". So basically we moved cpu_buf_vm_clear_mmio_only into a CPU feature to make it accessible from asm? (Also let's use BUF instead of BUFFERS in the name, for consistency)
On Thu, Oct 30, 2025 at 04:26:10PM +0000, Brendan Jackman wrote: > On Thu Oct 30, 2025 at 4:06 PM UTC, Sean Christopherson wrote: > > On Thu, Oct 30, 2025, Brendan Jackman wrote: > >> > @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run) > >> > /* Load guest RAX. This kills the @regs pointer! */ > >> > mov VCPU_RAX(%_ASM_AX), %_ASM_AX > >> > > >> > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */ > >> > + jz .Lskip_clear_cpu_buffers > >> > >> Hm, it's a bit weird that we have the "alternative" inside > >> VM_CLEAR_CPU_BUFFERS, but then we still keep the test+jz > >> unconditionally. > > > > Yeah, I had the same reaction, but couldn't come up with a clean-ish solution > > and so ignored it :-) > > > >> If we really want to super-optimise the no-mitigations-needed case, > >> shouldn't we want to avoid the conditional in the asm if it never > >> actually leads to a flush? > >> > >> On the other hand, if we don't mind a couple of extra instructions, > >> shouldn't we be fine with just having the whole asm code based solely > >> on VMX_RUN_CLEAR_CPU_BUFFERS and leaving the > >> X86_FEATURE_CLEAR_CPU_BUF_VM to the C code? > >> > >> I guess the issue is that in the latter case we'd be back to having > >> unnecessary inconsistency with AMD code while in the former case... well > >> that would just be really annoying asm code - am I on the right > >> wavelength there? So I'm not necessarily asking for changes here, just > >> probing in case it prompts any interesting insights on your side. > >> > >> (Also, maybe this test+jz has a similar cost to the nops that the > >> "alternative" would inject anyway...?) > > > > It's not at all expensive. My bigger objection is that it's hard to follow what's > > happening. > > > > Aha! Idea. IIUC, only the MMIO Stale Data is conditional based on the properties > > of the vCPU, so we should track _that_ in a KVM_RUN flag. And then if we add yet > > another X86_FEATURE for MMIO Stale Data flushing (instead of a static branch), > > this path can use ALTERNATIVE_2. The use of ALTERNATIVE_2 isn't exactly pretty, > > but IMO this is much more intuitive. > > > > diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h > > index 004fe1ca89f0..b9651960e069 100644 > > --- a/arch/x86/kvm/vmx/run_flags.h > > +++ b/arch/x86/kvm/vmx/run_flags.h > > @@ -4,10 +4,10 @@ > > > > #define VMX_RUN_VMRESUME_SHIFT 0 > > #define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 > > -#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2 > > +#define VMX_RUN_CAN_ACCESS_HOST_MMIO_SHIT 2 > > > > #define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) > > #define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) > > -#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT) > > +#define VMX_RUN_CAN_ACCESS_HOST_MMIO BIT(VMX_RUN_CAN_ACCESS_HOST_MMIO_SHIT) > > > > #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ > > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S > > index ec91f4267eca..50a748b489b4 100644 > > --- a/arch/x86/kvm/vmx/vmenter.S > > +++ b/arch/x86/kvm/vmx/vmenter.S > > @@ -137,8 +137,10 @@ SYM_FUNC_START(__vmx_vcpu_run) > > /* Load @regs to RAX. */ > > mov (%_ASM_SP), %_ASM_AX > > > > - /* jz .Lskip_clear_cpu_buffers below relies on this */ > > - test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx > > + /* Check if jz .Lskip_clear_cpu_buffers below relies on this */ > > + ALTERNATIVE_2 "", > > + "", X86_FEATURE_CLEAR_CPU_BUF > > + "test $VMX_RUN_CAN_ACCESS_HOST_MMIO, %ebx", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO > > Er, I don't understand the ALTERNATIVE_2 here, don't we just need this? > > ALTERNATIVE "" "test $VMX_RUN_CAN_ACCESS_HOST_MMIO, %ebx", > X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO Yeah, right. > > /* Check if vmlaunch or vmresume is needed */ > > bt $VMX_RUN_VMRESUME_SHIFT, %ebx > > @@ -163,8 +165,9 @@ SYM_FUNC_START(__vmx_vcpu_run) > > /* Load guest RAX. This kills the @regs pointer! */ > > mov VCPU_RAX(%_ASM_AX), %_ASM_AX > > > > - /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */ > > - jz .Lskip_clear_cpu_buffers > > + ALTERNATIVE_2 "jmp .Lskip_clear_cpu_buffers", > > + "", X86_FEATURE_CLEAR_CPU_BUF > > + "jz .Lskip_clear_cpu_buffers", X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO > > To fit with the rest of Pawan's code this would need > s/X86_FEATURE_CLEAR_CPU_BUF/X86_FEATURE_CLEAR_CPU_BUF_VM/, right? Yes. > In case it reveals that I just don't understand ALTERNATIVE_2 at all, > I'm reading this second one as saying: > > if cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO) > "jz .Lskip_clear_cpu_buffers " > else if !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF_VM) > "jmp .Lskip_clear_cpu_buffers" > > I.e. I'm understanding X86_FEATURE_CLEAR_CPU_BUFFERS_MMIO as mutually > exclusive with X86_FEATURE_CLEAR_CPU_BUF_VM, it means "you _only_ need > to verw MMIO". Yes, that's also my understanding. > So basically we moved cpu_buf_vm_clear_mmio_only into a > CPU feature to make it accessible from asm? Essentially, yes. > (Also let's use BUF instead of BUFFERS in the name, for consistency) Agree.
On Wed, Oct 29, 2025 at 02:27:00PM -0700, Pawan Gupta wrote:
> When a system is only affected by MMIO Stale Data, VERW mitigation is
> currently handled differently than other data sampling attacks like
> MDS/TAA/RFDS, that do the VERW in asm. This is because for MMIO Stale Data,
> VERW is needed only when the guest can access host MMIO, this was tricky to
> check in asm.
>
> Refactoring done by:
>
> 83ebe7157483 ("KVM: VMX: Apply MMIO Stale Data mitigation if KVM maps
> MMIO into the guest")
>
> now makes it easier to execute VERW conditionally in asm based on
> VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO.
>
> Unify MMIO Stale Data mitigation with other VERW-based mitigations and only
> have single VERW callsite in __vmx_vcpu_run(). Remove the now unnecessary
> call to x86_clear_cpu_buffer() in vmx_vcpu_enter_exit().
>
> This also untangles L1D Flush and MMIO Stale Data mitigation. Earlier, an
> L1D Flush would skip the VERW for MMIO Stale Data. Now, both the
> mitigations are independent of each other. Although, this has little
> practical implication since there are no CPUs that are affected by L1TF and
> are *only* affected by MMIO Stale Data (i.e. not affected by MDS/TAA/RFDS).
> But, this makes the code cleaner and easier to maintain.
>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> ---
> arch/x86/kvm/vmx/run_flags.h | 12 ++++++------
> arch/x86/kvm/vmx/vmenter.S | 5 +++++
> arch/x86/kvm/vmx/vmx.c | 26 ++++++++++----------------
> 3 files changed, 21 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
> index 2f20fb170def8b10c8c0c46f7ba751f845c19e2c..004fe1ca89f05524bf3986540056de2caf0abbad 100644
> --- a/arch/x86/kvm/vmx/run_flags.h
> +++ b/arch/x86/kvm/vmx/run_flags.h
> @@ -2,12 +2,12 @@
> #ifndef __KVM_X86_VMX_RUN_FLAGS_H
> #define __KVM_X86_VMX_RUN_FLAGS_H
>
> -#define VMX_RUN_VMRESUME_SHIFT 0
> -#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2
> +#define VMX_RUN_VMRESUME_SHIFT 0
> +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> +#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
>
> -#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> -#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT)
> +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> +#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
>
> #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> index 0dd23beae207795484150698d1674dc4044cc520..ec91f4267eca319ffa8e6079887e8dfecc7f96d8 100644
> --- a/arch/x86/kvm/vmx/vmenter.S
> +++ b/arch/x86/kvm/vmx/vmenter.S
> @@ -137,6 +137,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
> /* Load @regs to RAX. */
> mov (%_ASM_SP), %_ASM_AX
>
> + /* jz .Lskip_clear_cpu_buffers below relies on this */
> + test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
> +
> /* Check if vmlaunch or vmresume is needed */
> bt $VMX_RUN_VMRESUME_SHIFT, %ebx
>
> @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
> /* Load guest RAX. This kills the @regs pointer! */
> mov VCPU_RAX(%_ASM_AX), %_ASM_AX
>
> + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> + jz .Lskip_clear_cpu_buffers
> /* Clobbers EFLAGS.ZF */
> VM_CLEAR_CPU_BUFFERS
> .Lskip_clear_cpu_buffers:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 451be757b3d1b2fec6b2b79157f26dd43bc368b8..303935882a9f8d1d8f81a499cdce1fdc8dad62f0 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -903,9 +903,16 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
> flags |= VMX_RUN_SAVE_SPEC_CTRL;
>
> - if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only) &&
> - kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> - flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO;
> + /*
> + * When affected by MMIO Stale Data only (and not other data sampling
> + * attacks) only clear for MMIO-capable guests.
> + */
> + if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
> + if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> + } else {
> + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> + }
Setting the flag here is harmless but not necessary when the CPU is not
affected by any of the data sampling attacks. VM_CLEAR_CPU_BUFFERS would be
a NOP in the case.
However, me looking at this code in a year or two would be confused why the
flag is always set on unaffected CPUs. Below change to conditionally set
the flag would make it clearer.
---
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 303935882a9f..0eab59ab2698 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -910,7 +910,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
- } else {
+ } else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF_VM)) {
flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
}
On Wed, Oct 29, 2025 at 05:33:46PM +0800, Pawan Gupta wrote:
> On Wed, Oct 29, 2025 at 02:27:00PM -0700, Pawan Gupta wrote:
> > When a system is only affected by MMIO Stale Data, VERW mitigation is
> > currently handled differently than other data sampling attacks like
> > MDS/TAA/RFDS, that do the VERW in asm. This is because for MMIO Stale Data,
> > VERW is needed only when the guest can access host MMIO, this was tricky to
> > check in asm.
> >
> > Refactoring done by:
> >
> > 83ebe7157483 ("KVM: VMX: Apply MMIO Stale Data mitigation if KVM maps
> > MMIO into the guest")
> >
> > now makes it easier to execute VERW conditionally in asm based on
> > VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO.
> >
> > Unify MMIO Stale Data mitigation with other VERW-based mitigations and only
> > have single VERW callsite in __vmx_vcpu_run(). Remove the now unnecessary
> > call to x86_clear_cpu_buffer() in vmx_vcpu_enter_exit().
> >
> > This also untangles L1D Flush and MMIO Stale Data mitigation. Earlier, an
> > L1D Flush would skip the VERW for MMIO Stale Data. Now, both the
> > mitigations are independent of each other. Although, this has little
> > practical implication since there are no CPUs that are affected by L1TF and
> > are *only* affected by MMIO Stale Data (i.e. not affected by MDS/TAA/RFDS).
> > But, this makes the code cleaner and easier to maintain.
> >
> > Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> > ---
> > arch/x86/kvm/vmx/run_flags.h | 12 ++++++------
> > arch/x86/kvm/vmx/vmenter.S | 5 +++++
> > arch/x86/kvm/vmx/vmx.c | 26 ++++++++++----------------
> > 3 files changed, 21 insertions(+), 22 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
> > index 2f20fb170def8b10c8c0c46f7ba751f845c19e2c..004fe1ca89f05524bf3986540056de2caf0abbad 100644
> > --- a/arch/x86/kvm/vmx/run_flags.h
> > +++ b/arch/x86/kvm/vmx/run_flags.h
> > @@ -2,12 +2,12 @@
> > #ifndef __KVM_X86_VMX_RUN_FLAGS_H
> > #define __KVM_X86_VMX_RUN_FLAGS_H
> >
> > -#define VMX_RUN_VMRESUME_SHIFT 0
> > -#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> > -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2
> > +#define VMX_RUN_VMRESUME_SHIFT 0
> > +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> > +#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
> >
> > -#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> > -#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> > -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT)
> > +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> > +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> > +#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
> >
> > #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
> > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> > index 0dd23beae207795484150698d1674dc4044cc520..ec91f4267eca319ffa8e6079887e8dfecc7f96d8 100644
> > --- a/arch/x86/kvm/vmx/vmenter.S
> > +++ b/arch/x86/kvm/vmx/vmenter.S
> > @@ -137,6 +137,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
> > /* Load @regs to RAX. */
> > mov (%_ASM_SP), %_ASM_AX
> >
> > + /* jz .Lskip_clear_cpu_buffers below relies on this */
> > + test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
> > +
> > /* Check if vmlaunch or vmresume is needed */
> > bt $VMX_RUN_VMRESUME_SHIFT, %ebx
> >
> > @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
> > /* Load guest RAX. This kills the @regs pointer! */
> > mov VCPU_RAX(%_ASM_AX), %_ASM_AX
> >
> > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> > + jz .Lskip_clear_cpu_buffers
> > /* Clobbers EFLAGS.ZF */
> > VM_CLEAR_CPU_BUFFERS
> > .Lskip_clear_cpu_buffers:
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 451be757b3d1b2fec6b2b79157f26dd43bc368b8..303935882a9f8d1d8f81a499cdce1fdc8dad62f0 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -903,9 +903,16 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> > if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
> > flags |= VMX_RUN_SAVE_SPEC_CTRL;
> >
> > - if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only) &&
> > - kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> > - flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO;
> > + /*
> > + * When affected by MMIO Stale Data only (and not other data sampling
> > + * attacks) only clear for MMIO-capable guests.
> > + */
> > + if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
> > + if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> > + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> > + } else {
> > + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> > + }
>
> Setting the flag here is harmless but not necessary when the CPU is not
> affected by any of the data sampling attacks. VM_CLEAR_CPU_BUFFERS would be
> a NOP in the case.
>
> However, me looking at this code in a year or two would be confused why the
> flag is always set on unaffected CPUs. Below change to conditionally set
> the flag would make it clearer.
>
> ---
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 303935882a9f..0eab59ab2698 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -910,7 +910,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
> if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> - } else {
> + } else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF_VM)) {
> flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> }
>
Oh, even no need a or two year later, I just feel confusion
when look at this part first time. But this change anyway
makes it more clear to me.
On Thu, Oct 30, 2025 at 01:52:39PM +0800, Yao Yuan wrote:
> On Wed, Oct 29, 2025 at 05:33:46PM +0800, Pawan Gupta wrote:
> > On Wed, Oct 29, 2025 at 02:27:00PM -0700, Pawan Gupta wrote:
> > > When a system is only affected by MMIO Stale Data, VERW mitigation is
> > > currently handled differently than other data sampling attacks like
> > > MDS/TAA/RFDS, that do the VERW in asm. This is because for MMIO Stale Data,
> > > VERW is needed only when the guest can access host MMIO, this was tricky to
> > > check in asm.
> > >
> > > Refactoring done by:
> > >
> > > 83ebe7157483 ("KVM: VMX: Apply MMIO Stale Data mitigation if KVM maps
> > > MMIO into the guest")
> > >
> > > now makes it easier to execute VERW conditionally in asm based on
> > > VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO.
> > >
> > > Unify MMIO Stale Data mitigation with other VERW-based mitigations and only
> > > have single VERW callsite in __vmx_vcpu_run(). Remove the now unnecessary
> > > call to x86_clear_cpu_buffer() in vmx_vcpu_enter_exit().
> > >
> > > This also untangles L1D Flush and MMIO Stale Data mitigation. Earlier, an
> > > L1D Flush would skip the VERW for MMIO Stale Data. Now, both the
> > > mitigations are independent of each other. Although, this has little
> > > practical implication since there are no CPUs that are affected by L1TF and
> > > are *only* affected by MMIO Stale Data (i.e. not affected by MDS/TAA/RFDS).
> > > But, this makes the code cleaner and easier to maintain.
> > >
> > > Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> > > ---
> > > arch/x86/kvm/vmx/run_flags.h | 12 ++++++------
> > > arch/x86/kvm/vmx/vmenter.S | 5 +++++
> > > arch/x86/kvm/vmx/vmx.c | 26 ++++++++++----------------
> > > 3 files changed, 21 insertions(+), 22 deletions(-)
> > >
> > > diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
> > > index 2f20fb170def8b10c8c0c46f7ba751f845c19e2c..004fe1ca89f05524bf3986540056de2caf0abbad 100644
> > > --- a/arch/x86/kvm/vmx/run_flags.h
> > > +++ b/arch/x86/kvm/vmx/run_flags.h
> > > @@ -2,12 +2,12 @@
> > > #ifndef __KVM_X86_VMX_RUN_FLAGS_H
> > > #define __KVM_X86_VMX_RUN_FLAGS_H
> > >
> > > -#define VMX_RUN_VMRESUME_SHIFT 0
> > > -#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> > > -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2
> > > +#define VMX_RUN_VMRESUME_SHIFT 0
> > > +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> > > +#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
> > >
> > > -#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> > > -#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> > > -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT)
> > > +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> > > +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> > > +#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
> > >
> > > #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
> > > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> > > index 0dd23beae207795484150698d1674dc4044cc520..ec91f4267eca319ffa8e6079887e8dfecc7f96d8 100644
> > > --- a/arch/x86/kvm/vmx/vmenter.S
> > > +++ b/arch/x86/kvm/vmx/vmenter.S
> > > @@ -137,6 +137,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
> > > /* Load @regs to RAX. */
> > > mov (%_ASM_SP), %_ASM_AX
> > >
> > > + /* jz .Lskip_clear_cpu_buffers below relies on this */
> > > + test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
> > > +
> > > /* Check if vmlaunch or vmresume is needed */
> > > bt $VMX_RUN_VMRESUME_SHIFT, %ebx
> > >
> > > @@ -160,6 +163,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
> > > /* Load guest RAX. This kills the @regs pointer! */
> > > mov VCPU_RAX(%_ASM_AX), %_ASM_AX
> > >
> > > + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
> > > + jz .Lskip_clear_cpu_buffers
> > > /* Clobbers EFLAGS.ZF */
> > > VM_CLEAR_CPU_BUFFERS
> > > .Lskip_clear_cpu_buffers:
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > > index 451be757b3d1b2fec6b2b79157f26dd43bc368b8..303935882a9f8d1d8f81a499cdce1fdc8dad62f0 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -903,9 +903,16 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> > > if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
> > > flags |= VMX_RUN_SAVE_SPEC_CTRL;
> > >
> > > - if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only) &&
> > > - kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> > > - flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO;
> > > + /*
> > > + * When affected by MMIO Stale Data only (and not other data sampling
> > > + * attacks) only clear for MMIO-capable guests.
> > > + */
> > > + if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
> > > + if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> > > + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> > > + } else {
> > > + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> > > + }
> >
> > Setting the flag here is harmless but not necessary when the CPU is not
> > affected by any of the data sampling attacks. VM_CLEAR_CPU_BUFFERS would be
> > a NOP in the case.
> >
> > However, me looking at this code in a year or two would be confused why the
> > flag is always set on unaffected CPUs. Below change to conditionally set
> > the flag would make it clearer.
> >
> > ---
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 303935882a9f..0eab59ab2698 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -910,7 +910,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> > if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
> > if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> > flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> > - } else {
> > + } else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF_VM)) {
> > flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> > }
> >
>
> Oh, even no need a or two year later, I just feel confusion
> when look at this part first time. But this change anyway
> makes it more clear to me.
:-) I felt the same after sending the patch.
On Wed, Oct 29, 2025, Pawan Gupta wrote:
> When a system is only affected by MMIO Stale Data, VERW mitigation is
> currently handled differently than other data sampling attacks like
> MDS/TAA/RFDS, that do the VERW in asm. This is because for MMIO Stale Data,
> VERW is needed only when the guest can access host MMIO, this was tricky to
> check in asm.
>
> Refactoring done by:
>
> 83ebe7157483 ("KVM: VMX: Apply MMIO Stale Data mitigation if KVM maps
> MMIO into the guest")
>
> now makes it easier to execute VERW conditionally in asm based on
> VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO.
>
> Unify MMIO Stale Data mitigation with other VERW-based mitigations and only
> have single VERW callsite in __vmx_vcpu_run(). Remove the now unnecessary
> call to x86_clear_cpu_buffer() in vmx_vcpu_enter_exit().
>
> This also untangles L1D Flush and MMIO Stale Data mitigation. Earlier, an
> L1D Flush would skip the VERW for MMIO Stale Data. Now, both the
> mitigations are independent of each other. Although, this has little
> practical implication since there are no CPUs that are affected by L1TF and
> are *only* affected by MMIO Stale Data (i.e. not affected by MDS/TAA/RFDS).
> But, this makes the code cleaner and easier to maintain.
Heh, and largely makes our discussion on the L1TF cleanup moot :-)
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> ---
...
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 451be757b3d1b2fec6b2b79157f26dd43bc368b8..303935882a9f8d1d8f81a499cdce1fdc8dad62f0 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -903,9 +903,16 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
> flags |= VMX_RUN_SAVE_SPEC_CTRL;
>
> - if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only) &&
> - kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> - flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO;
> + /*
> + * When affected by MMIO Stale Data only (and not other data sampling
> + * attacks) only clear for MMIO-capable guests.
> + */
> + if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
> + if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> + } else {
> + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> + }
This is quire confusing and subtle. E.g. it requires the reader to know that
cpu_buf_vm_clear_mmio_only is mutually exlusive with X86_FEATURE_CLEAR_CPU_BUF,
and that VMX_RUN_CLEAR_CPU_BUFFERS is ignored if X86_FEATURE_CLEAR_CPU_BUF=n.
At least, I think that's how it works :-)
Isn't the above equivalent to this when all is said and done?
if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) ||
(static_branch_unlikely(&cpu_buf_vm_clear_mmio_only) &&
kvm_vcpu_can_access_host_mmio(&vmx->vcpu)))
flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
>
> return flags;
> }
> @@ -7320,21 +7327,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
>
> guest_state_enter_irqoff();
>
> - /*
> - * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
> - * mitigation for MDS is done late in VMentry and is still
> - * executed in spite of L1D Flush. This is because an extra VERW
> - * should not matter much after the big hammer L1D Flush.
> - *
> - * cpu_buf_vm_clear is used when system is not vulnerable to MDS/TAA,
> - * and is affected by MMIO Stale Data. In such cases mitigation in only
> - * needed against an MMIO capable guest.
> - */
> if (static_branch_unlikely(&vmx_l1d_should_flush))
> vmx_l1d_flush(vcpu);
> - else if (static_branch_unlikely(&cpu_buf_vm_clear) &&
> - (flags & VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO))
> - x86_clear_cpu_buffers();
>
> vmx_disable_fb_clear(vmx);
On Wed, Oct 29, 2025 at 05:27:37PM -0700, Sean Christopherson wrote:
> On Wed, Oct 29, 2025, Pawan Gupta wrote:
> > When a system is only affected by MMIO Stale Data, VERW mitigation is
> > currently handled differently than other data sampling attacks like
> > MDS/TAA/RFDS, that do the VERW in asm. This is because for MMIO Stale Data,
> > VERW is needed only when the guest can access host MMIO, this was tricky to
> > check in asm.
> >
> > Refactoring done by:
> >
> > 83ebe7157483 ("KVM: VMX: Apply MMIO Stale Data mitigation if KVM maps
> > MMIO into the guest")
> >
> > now makes it easier to execute VERW conditionally in asm based on
> > VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO.
> >
> > Unify MMIO Stale Data mitigation with other VERW-based mitigations and only
> > have single VERW callsite in __vmx_vcpu_run(). Remove the now unnecessary
> > call to x86_clear_cpu_buffer() in vmx_vcpu_enter_exit().
> >
> > This also untangles L1D Flush and MMIO Stale Data mitigation. Earlier, an
> > L1D Flush would skip the VERW for MMIO Stale Data. Now, both the
> > mitigations are independent of each other. Although, this has little
> > practical implication since there are no CPUs that are affected by L1TF and
> > are *only* affected by MMIO Stale Data (i.e. not affected by MDS/TAA/RFDS).
> > But, this makes the code cleaner and easier to maintain.
>
> Heh, and largely makes our discussion on the L1TF cleanup moot :-)
Well, this series is largely a result of that discussion :-)
> > Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> > ---
>
> ...
>
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 451be757b3d1b2fec6b2b79157f26dd43bc368b8..303935882a9f8d1d8f81a499cdce1fdc8dad62f0 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -903,9 +903,16 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> > if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
> > flags |= VMX_RUN_SAVE_SPEC_CTRL;
> >
> > - if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only) &&
> > - kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> > - flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO;
> > + /*
> > + * When affected by MMIO Stale Data only (and not other data sampling
> > + * attacks) only clear for MMIO-capable guests.
> > + */
> > + if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
> > + if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> > + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> > + } else {
> > + flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
> > + }
>
> This is quire confusing and subtle.
I realized that and sent the below follow-up almost at the same time:
Setting the flag here is harmless but not necessary when the CPU is not
affected by any of the data sampling attacks. VM_CLEAR_CPU_BUFFERS would be
a NOP in the case.
However, me looking at this code in a year or two would be confused why the
flag is always set on unaffected CPUs. Below change to conditionally set
the flag would make it clearer.
---
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 303935882a9f..0eab59ab2698 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -910,7 +910,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
if (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only)) {
if (kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
- } else {
+ } else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF_VM)) {
flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
}
> E.g. it requires the reader to know that cpu_buf_vm_clear_mmio_only is
> mutually exlusive with X86_FEATURE_CLEAR_CPU_BUF, and that
> VMX_RUN_CLEAR_CPU_BUFFERS is ignored if X86_FEATURE_CLEAR_CPU_BUF=n.
>
> At least, I think that's how it works :-)
That is right, only thing is instead of X86_FEATURE_CLEAR_CPU_BUF,
VM_CLEAR_CPU_BUFFERS depends on KVM specific X86_FEATURE_CLEAR_CPU_BUF_VM.
> Isn't the above equivalent to this when all is said and done?
>
> if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) ||
For the above reason, it might be better to to use
X86_FEATURE_CLEAR_CPU_BUF_VM (as in the diff I pasted above).
> (static_branch_unlikely(&cpu_buf_vm_clear_mmio_only) &&
> kvm_vcpu_can_access_host_mmio(&vmx->vcpu)))
> flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
>
© 2016 - 2025 Red Hat, Inc.