[PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12

Jim Mattson posted 1 patch 3 weeks, 4 days ago
Documentation/virt/kvm/api.rst  | 10 ++++++++++
arch/x86/include/asm/kvm_host.h |  3 ++-
arch/x86/include/uapi/asm/kvm.h |  1 +
arch/x86/kvm/vmx/nested.c       | 22 ++++++++++++++++++----
4 files changed, 31 insertions(+), 5 deletions(-)
[PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Jim Mattson 3 weeks, 4 days ago
Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
with L2's execution.  The quirk is enabled by default for backwards
compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
consistency with WRMSR(IA32_DEBUGCTL) is desired.

Fixes: 095686e6fcb4 ("KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested VM-Enter")
Signed-off-by: Jim Mattson <jmattson@google.com>
---
 Documentation/virt/kvm/api.rst  | 10 ++++++++++
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/include/uapi/asm/kvm.h |  1 +
 arch/x86/kvm/vmx/nested.c       | 22 ++++++++++++++++++----
 4 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 01a3abef8abb..31019675f2f2 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8414,6 +8414,16 @@ KVM_X86_QUIRK_IGNORE_GUEST_PAT      By default, on Intel platforms, KVM ignores
                                     guest software, for example if it does not
                                     expose a bochs graphics device (which is
                                     known to have had a buggy driver).
+
+KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM
+                                    By default, KVM allows L1 to set FREEZE_IN_SMM
+                                    in vmcs12 when using nested VMX.  When this
+                                    quirk is disabled, KVM does not allow L1 to
+                                    set the bit.  Prior to KVM taking ownership
+                                    of the bit to ensure PMCs are frozen during
+                                    physical SMM, L1 could set FREEZE_IN_SMM in
+                                    vmcs12 to freeze PMCs during physical SMM
+                                    coincident with L2's execution.
 =================================== ============================================
 
 7.32 KVM_CAP_MAX_VCPU_ID
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ecd4019b84b7..80f9806862ab 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2476,7 +2476,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
 	 KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS |	\
 	 KVM_X86_QUIRK_SLOT_ZAP_ALL |		\
 	 KVM_X86_QUIRK_STUFF_FEATURE_MSRS |	\
-	 KVM_X86_QUIRK_IGNORE_GUEST_PAT)
+	 KVM_X86_QUIRK_IGNORE_GUEST_PAT |	\
+	 KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM)
 
 #define KVM_X86_CONDITIONAL_QUIRKS		\
 	(KVM_X86_QUIRK_CD_NW_CLEARED |		\
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 7ceff6583652..2b1c494f3adf 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -476,6 +476,7 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_SLOT_ZAP_ALL		(1 << 7)
 #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS	(1 << 8)
 #define KVM_X86_QUIRK_IGNORE_GUEST_PAT		(1 << 9)
+#define KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM	(1 << 10)
 
 #define KVM_STATE_NESTED_FORMAT_VMX	0
 #define KVM_STATE_NESTED_FORMAT_SVM	1
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0521b55d47a5..bc8f0b3aa70b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3298,10 +3298,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 	if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
 		return -EINVAL;
 
-	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
-	    (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
-	     CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
-		return -EINVAL;
+	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
+		u64 debugctl = vmcs12->guest_ia32_debugctl;
+
+		/*
+		 * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in
+		 * L2's DEBUGCTL under a quirk for backwards compatibility.
+		 * Prior to KVM taking ownership of the bit to ensure PMCs are
+		 * frozen during physical SMM, L1 could set FREEZE_IN_SMM in
+		 * vmcs12 to freeze PMCs during physical SMM coincident with
+		 * L2's execution.
+		 */
+		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM))
+			debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM;
+
+		if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
+		    CC(!vmx_is_valid_debugctl(vcpu, debugctl, false)))
+			return -EINVAL;
+	}
 
 	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
 	    CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
-- 
2.52.0.457.g6b5491de43-goog
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Sean Christopherson 3 weeks, 4 days ago
On Tue, Jan 13, 2026, Jim Mattson wrote:
> Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> with L2's execution.  The quirk is enabled by default for backwards
> compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> consistency with WRMSR(IA32_DEBUGCTL) is desired.

It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02

	if (vmx->nested.nested_run_pending &&
	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
		vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
					       vmx_get_supported_debugctl(vcpu, false)); <====
	} else {
		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
		vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
	}

both from a correctness standpoint and so that users aren't mislead into thinking
the quirk lets L1 control of FREEZE_IN_SMM while running L2.

> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 0521b55d47a5..bc8f0b3aa70b 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -3298,10 +3298,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
>  	if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
>  		return -EINVAL;
>  
> -	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
> -	    (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> -	     CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
> -		return -EINVAL;
> +	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
> +		u64 debugctl = vmcs12->guest_ia32_debugctl;
> +
> +		/*
> +		 * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in
> +		 * L2's DEBUGCTL under a quirk for backwards compatibility.
> +		 * Prior to KVM taking ownership of the bit to ensure PMCs are
> +		 * frozen during physical SMM, L1 could set FREEZE_IN_SMM in
> +		 * vmcs12 to freeze PMCs during physical SMM coincident with
> +		 * L2's execution.
> +		 */
> +		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM))
> +			debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM;
> +
> +		if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> +		    CC(!vmx_is_valid_debugctl(vcpu, debugctl, false)))

I'm mildly tempted to say we should quirk the entire consistency check instead of
limiting it to FREEZE_IN_SMM, purely so that we don't have to add yet another quirk
if a different setup breaks on a different bit.  I suppose we could limit the quirk
to bits that could have been plausibly set in hardware, because otherwise VM-Entry
using L2 would VM-Fail, but that's still quite a few bits.

I'm definitely not opposed to a targeted quirk though.

> +			return -EINVAL;
> +	}
>  
>  	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
>  	    CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
> -- 
> 2.52.0.457.g6b5491de43-goog
>
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Jim Mattson 3 weeks, 4 days ago
On Tue, Jan 13, 2026 at 4:42 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Tue, Jan 13, 2026, Jim Mattson wrote:
> > Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> > IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> > commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> > DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> > FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> > with L2's execution.  The quirk is enabled by default for backwards
> > compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> > consistency with WRMSR(IA32_DEBUGCTL) is desired.
>
> It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02
>
>         if (vmx->nested.nested_run_pending &&
>             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
>                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
>                 vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
>                                                vmx_get_supported_debugctl(vcpu, false)); <====
>         } else {
>                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
>                 vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
>         }
>
> both from a correctness standpoint and so that users aren't mislead into thinking
> the quirk lets L1 control of FREEZE_IN_SMM while running L2.

Yes, it's probably worth pointing out that the VM is now subject to
the whims of the L0 administrators.

While that makes some sense for the legacy vPMU, where KVM is just
another client of host perf, perhaps the decision should be revisited
in the case of the MPT vPMU, where KVM owns the PMU while the vCPU is
in VMX non-root operation.

> > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > index 0521b55d47a5..bc8f0b3aa70b 100644
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -3298,10 +3298,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
> >       if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
> >               return -EINVAL;
> >
> > -     if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
> > -         (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> > -          CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
> > -             return -EINVAL;
> > +     if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
> > +             u64 debugctl = vmcs12->guest_ia32_debugctl;
> > +
> > +             /*
> > +              * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in
> > +              * L2's DEBUGCTL under a quirk for backwards compatibility.
> > +              * Prior to KVM taking ownership of the bit to ensure PMCs are
> > +              * frozen during physical SMM, L1 could set FREEZE_IN_SMM in
> > +              * vmcs12 to freeze PMCs during physical SMM coincident with
> > +              * L2's execution.
> > +              */
> > +             if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM))
> > +                     debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM;
> > +
> > +             if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> > +                 CC(!vmx_is_valid_debugctl(vcpu, debugctl, false)))
>
> I'm mildly tempted to say we should quirk the entire consistency check instead of
> limiting it to FREEZE_IN_SMM, purely so that we don't have to add yet another quirk
> if a different setup breaks on a different bit.  I suppose we could limit the quirk
> to bits that could have been plausibly set in hardware, because otherwise VM-Entry
> using L2 would VM-Fail, but that's still quite a few bits.
>
> I'm definitely not opposed to a targeted quirk though.

I have no preference.

> > +                     return -EINVAL;
> > +     }
> >
> >       if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
> >           CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
> > --
> > 2.52.0.457.g6b5491de43-goog
> >
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Jim Mattson 2 weeks, 2 days ago
On Tue, Jan 13, 2026 at 7:47 PM Jim Mattson <jmattson@google.com> wrote:
>
> On Tue, Jan 13, 2026 at 4:42 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > On Tue, Jan 13, 2026, Jim Mattson wrote:
> > > Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> > > IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> > > commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> > > DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> > > FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> > > with L2's execution.  The quirk is enabled by default for backwards
> > > compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> > > consistency with WRMSR(IA32_DEBUGCTL) is desired.
> >
> > It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02
> >
> >         if (vmx->nested.nested_run_pending &&
> >             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> >                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> >                 vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
> >                                                vmx_get_supported_debugctl(vcpu, false)); <====
> >         } else {
> >                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> >                 vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
> >         }
> >
> > both from a correctness standpoint and so that users aren't mislead into thinking
> > the quirk lets L1 control of FREEZE_IN_SMM while running L2.
>
> Yes, it's probably worth pointing out that the VM is now subject to
> the whims of the L0 administrators.
>
> While that makes some sense for the legacy vPMU, where KVM is just
> another client of host perf, perhaps the decision should be revisited
> in the case of the MPT vPMU, where KVM owns the PMU while the vCPU is
> in VMX non-root operation.
>
> > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > > index 0521b55d47a5..bc8f0b3aa70b 100644
> > > --- a/arch/x86/kvm/vmx/nested.c
> > > +++ b/arch/x86/kvm/vmx/nested.c
> > > @@ -3298,10 +3298,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
> > >       if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
> > >               return -EINVAL;
> > >
> > > -     if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
> > > -         (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> > > -          CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
> > > -             return -EINVAL;
> > > +     if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
> > > +             u64 debugctl = vmcs12->guest_ia32_debugctl;
> > > +
> > > +             /*
> > > +              * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in
> > > +              * L2's DEBUGCTL under a quirk for backwards compatibility.
> > > +              * Prior to KVM taking ownership of the bit to ensure PMCs are
> > > +              * frozen during physical SMM, L1 could set FREEZE_IN_SMM in
> > > +              * vmcs12 to freeze PMCs during physical SMM coincident with
> > > +              * L2's execution.
> > > +              */
> > > +             if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM))
> > > +                     debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM;
> > > +
> > > +             if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> > > +                 CC(!vmx_is_valid_debugctl(vcpu, debugctl, false)))
> >
> > I'm mildly tempted to say we should quirk the entire consistency check instead of
> > limiting it to FREEZE_IN_SMM, purely so that we don't have to add yet another quirk
> > if a different setup breaks on a different bit.  I suppose we could limit the quirk
> > to bits that could have been plausibly set in hardware, because otherwise VM-Entry
> > using L2 would VM-Fail, but that's still quite a few bits.
> >
> > I'm definitely not opposed to a targeted quirk though.
>
> I have no preference.
>
Sean -

Would you like me to post a v2?
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Sean Christopherson 4 days, 2 hours ago
On Thu, Jan 22, 2026, Jim Mattson wrote:
> On Tue, Jan 13, 2026 at 7:47 PM Jim Mattson <jmattson@google.com> wrote:
> > On Tue, Jan 13, 2026 at 4:42 PM Sean Christopherson <seanjc@google.com> wrote:
> > >
> > > On Tue, Jan 13, 2026, Jim Mattson wrote:
> > > > Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> > > > IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> > > > commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> > > > DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> > > > FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> > > > with L2's execution.  The quirk is enabled by default for backwards
> > > > compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> > > > consistency with WRMSR(IA32_DEBUGCTL) is desired.
> > >
> > > It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02
> > >
> > >         if (vmx->nested.nested_run_pending &&
> > >             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> > >                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> > >                 vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
> > >                                                vmx_get_supported_debugctl(vcpu, false)); <====
> > >         } else {
> > >                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> > >                 vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
> > >         }
> > >
> > > both from a correctness standpoint and so that users aren't mislead into thinking
> > > the quirk lets L1 control of FREEZE_IN_SMM while running L2.
> >
> > Yes, it's probably worth pointing out that the VM is now subject to
> > the whims of the L0 administrators.
> >
> > While that makes some sense for the legacy vPMU, where KVM is just
> > another client of host perf, perhaps the decision should be revisited
> > in the case of the MPT vPMU, where KVM owns the PMU while the vCPU is
> > in VMX non-root operation.

Eh, running guests with FREEZE_IN_SMM=0 seems absolutely crazy from a security
perspective.  If an admin wants to disable FREEZE_IN_SMM, they get to keep the
pieces.  And KVM definitely isn't going to override the admin, e.g. to allow the
guest to profile host SMM.

> > > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > > > index 0521b55d47a5..bc8f0b3aa70b 100644
> > > > --- a/arch/x86/kvm/vmx/nested.c
> > > > +++ b/arch/x86/kvm/vmx/nested.c
> > > > @@ -3298,10 +3298,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
> > > >       if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
> > > >               return -EINVAL;
> > > >
> > > > -     if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
> > > > -         (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> > > > -          CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
> > > > -             return -EINVAL;
> > > > +     if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
> > > > +             u64 debugctl = vmcs12->guest_ia32_debugctl;
> > > > +
> > > > +             /*
> > > > +              * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in
> > > > +              * L2's DEBUGCTL under a quirk for backwards compatibility.
> > > > +              * Prior to KVM taking ownership of the bit to ensure PMCs are
> > > > +              * frozen during physical SMM, L1 could set FREEZE_IN_SMM in
> > > > +              * vmcs12 to freeze PMCs during physical SMM coincident with
> > > > +              * L2's execution.
> > > > +              */
> > > > +             if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM))
> > > > +                     debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM;
> > > > +
> > > > +             if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> > > > +                 CC(!vmx_is_valid_debugctl(vcpu, debugctl, false)))
> > >
> > > I'm mildly tempted to say we should quirk the entire consistency check instead of
> > > limiting it to FREEZE_IN_SMM, purely so that we don't have to add yet another quirk
> > > if a different setup breaks on a different bit.  I suppose we could limit the quirk
> > > to bits that could have been plausibly set in hardware, because otherwise VM-Entry
> > > using L2 would VM-Fail, but that's still quite a few bits.
> > >
> > > I'm definitely not opposed to a targeted quirk though.
> >
> > I have no preference.

After mulling over the options from time to time, I think our best be is to quirk
only FREEZE_IN_SMM, but very explicity scope the quirk to just the consistency
check.  E.g. maybe KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM_CC?  That should help alert
readers to the fact that the quirk bypasses the check, but L2 will still see
FREEZE_IN_SMM=0 (e.g. in the unlikely scenario L1 disables interception of
DEBUGCTL).

As for why just FREEZE_IN_SMM, in addition to the fact that FREEZE_IN_SMM is the
only bit that broke anyone (as far as we know, /knock wood), it's also the only
bit that is host-owned.  I.e. unless the host admin likes SMM mucking with things,
skipping the consistency check isn't terrible from a functionality perspective
(KVM doesn't honor the bit for emulated SMM, but that's QEMU's problem :-D).

> Would you like me to post a v2?

Yes please.
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Jim Mattson 3 days, 3 hours ago
On Tue, Feb 3, 2026 at 6:00 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Thu, Jan 22, 2026, Jim Mattson wrote:
> > On Tue, Jan 13, 2026 at 7:47 PM Jim Mattson <jmattson@google.com> wrote:
> > > On Tue, Jan 13, 2026 at 4:42 PM Sean Christopherson <seanjc@google.com> wrote:
> > > >
> > > > On Tue, Jan 13, 2026, Jim Mattson wrote:
> > > > > Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> > > > > IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> > > > > commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> > > > > DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> > > > > FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> > > > > with L2's execution.  The quirk is enabled by default for backwards
> > > > > compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> > > > > consistency with WRMSR(IA32_DEBUGCTL) is desired.
> > > >
> > > > It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02
> > > >
> > > >         if (vmx->nested.nested_run_pending &&
> > > >             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> > > >                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> > > >                 vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
> > > >                                                vmx_get_supported_debugctl(vcpu, false)); <====
> > > >         } else {
> > > >                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> > > >                 vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
> > > >         }
> > > >
> > > > both from a correctness standpoint and so that users aren't mislead into thinking
> > > > the quirk lets L1 control of FREEZE_IN_SMM while running L2.
> > >
> > > Yes, it's probably worth pointing out that the VM is now subject to
> > > the whims of the L0 administrators.
> > >
> > > While that makes some sense for the legacy vPMU, where KVM is just
> > > another client of host perf, perhaps the decision should be revisited
> > > in the case of the MPT vPMU, where KVM owns the PMU while the vCPU is
> > > in VMX non-root operation.
>
> Eh, running guests with FREEZE_IN_SMM=0 seems absolutely crazy from a security
> perspective.  If an admin wants to disable FREEZE_IN_SMM, they get to keep the
> pieces.  And KVM definitely isn't going to override the admin, e.g. to allow the
> guest to profile host SMM.

I'm not sure what you mean by "they get to keep the pieces." What is
the security problem with allowing L1 to freeze *guest-owned* PMCs
during SMM?

> > > > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > > > > index 0521b55d47a5..bc8f0b3aa70b 100644
> > > > > --- a/arch/x86/kvm/vmx/nested.c
> > > > > +++ b/arch/x86/kvm/vmx/nested.c
> > > > > @@ -3298,10 +3298,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
> > > > >       if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
> > > > >               return -EINVAL;
> > > > >
> > > > > -     if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
> > > > > -         (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> > > > > -          CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
> > > > > -             return -EINVAL;
> > > > > +     if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
> > > > > +             u64 debugctl = vmcs12->guest_ia32_debugctl;
> > > > > +
> > > > > +             /*
> > > > > +              * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in
> > > > > +              * L2's DEBUGCTL under a quirk for backwards compatibility.
> > > > > +              * Prior to KVM taking ownership of the bit to ensure PMCs are
> > > > > +              * frozen during physical SMM, L1 could set FREEZE_IN_SMM in
> > > > > +              * vmcs12 to freeze PMCs during physical SMM coincident with
> > > > > +              * L2's execution.
> > > > > +              */
> > > > > +             if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM))
> > > > > +                     debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM;
> > > > > +
> > > > > +             if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
> > > > > +                 CC(!vmx_is_valid_debugctl(vcpu, debugctl, false)))
> > > >
> > > > I'm mildly tempted to say we should quirk the entire consistency check instead of
> > > > limiting it to FREEZE_IN_SMM, purely so that we don't have to add yet another quirk
> > > > if a different setup breaks on a different bit.  I suppose we could limit the quirk
> > > > to bits that could have been plausibly set in hardware, because otherwise VM-Entry
> > > > using L2 would VM-Fail, but that's still quite a few bits.
> > > >
> > > > I'm definitely not opposed to a targeted quirk though.
> > >
> > > I have no preference.
>
> After mulling over the options from time to time, I think our best be is to quirk
> only FREEZE_IN_SMM, but very explicity scope the quirk to just the consistency
> check.  E.g. maybe KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM_CC?  That should help alert
> readers to the fact that the quirk bypasses the check, but L2 will still see
> FREEZE_IN_SMM=0 (e.g. in the unlikely scenario L1 disables interception of
> DEBUGCTL).
>
> As for why just FREEZE_IN_SMM, in addition to the fact that FREEZE_IN_SMM is the
> only bit that broke anyone (as far as we know, /knock wood), it's also the only
> bit that is host-owned.  I.e. unless the host admin likes SMM mucking with things,
> skipping the consistency check isn't terrible from a functionality perspective
> (KVM doesn't honor the bit for emulated SMM, but that's QEMU's problem :-D).
>
> > Would you like me to post a v2?
>
> Yes please.
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Sean Christopherson 3 days, 2 hours ago
On Wed, Feb 04, 2026, Jim Mattson wrote:
> On Tue, Feb 3, 2026 at 6:00 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > On Thu, Jan 22, 2026, Jim Mattson wrote:
> > > On Tue, Jan 13, 2026 at 7:47 PM Jim Mattson <jmattson@google.com> wrote:
> > > > On Tue, Jan 13, 2026 at 4:42 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > >
> > > > > On Tue, Jan 13, 2026, Jim Mattson wrote:
> > > > > > Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> > > > > > IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> > > > > > commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> > > > > > DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> > > > > > FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> > > > > > with L2's execution.  The quirk is enabled by default for backwards
> > > > > > compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> > > > > > consistency with WRMSR(IA32_DEBUGCTL) is desired.
> > > > >
> > > > > It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02
> > > > >
> > > > >         if (vmx->nested.nested_run_pending &&
> > > > >             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> > > > >                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> > > > >                 vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
> > > > >                                                vmx_get_supported_debugctl(vcpu, false)); <====
> > > > >         } else {
> > > > >                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> > > > >                 vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
> > > > >         }
> > > > >
> > > > > both from a correctness standpoint and so that users aren't mislead into thinking
> > > > > the quirk lets L1 control of FREEZE_IN_SMM while running L2.
> > > >
> > > > Yes, it's probably worth pointing out that the VM is now subject to
> > > > the whims of the L0 administrators.
> > > >
> > > > While that makes some sense for the legacy vPMU, where KVM is just
> > > > another client of host perf, perhaps the decision should be revisited
> > > > in the case of the MPT vPMU, where KVM owns the PMU while the vCPU is
> > > > in VMX non-root operation.
> >
> > Eh, running guests with FREEZE_IN_SMM=0 seems absolutely crazy from a security
> > perspective.  If an admin wants to disable FREEZE_IN_SMM, they get to keep the
> > pieces.  And KVM definitely isn't going to override the admin, e.g. to allow the
> > guest to profile host SMM.
> 
> I'm not sure what you mean by "they get to keep the pieces." What is
> the security problem with allowing L1 to freeze *guest-owned* PMCs
> during SMM?

To give L1 the option to freeze PMCs, KVM would also need to give L1 the option
to *not* freeze PMCs.  At that point, the guest can use its PMCs to profile host
SMM code.  Maybe even leverage a PMI to attack a poorly written SMM handler.

In other words, unless I'm missing something, the only reasonable option is to
run the guest with FREEZE_IN_SMM=1, which means ignoring the guest's wishes.
Or I guess another way to look at it: you can have any color car you want, as
long as it's black :-) 
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Jim Mattson 3 days ago
On Wed, Feb 4, 2026 at 5:18 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Wed, Feb 04, 2026, Jim Mattson wrote:
> > On Tue, Feb 3, 2026 at 6:00 PM Sean Christopherson <seanjc@google.com> wrote:
> > >
> > > On Thu, Jan 22, 2026, Jim Mattson wrote:
> > > > On Tue, Jan 13, 2026 at 7:47 PM Jim Mattson <jmattson@google.com> wrote:
> > > > > On Tue, Jan 13, 2026 at 4:42 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > > >
> > > > > > On Tue, Jan 13, 2026, Jim Mattson wrote:
> > > > > > > Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> > > > > > > IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> > > > > > > commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> > > > > > > DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> > > > > > > FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> > > > > > > with L2's execution.  The quirk is enabled by default for backwards
> > > > > > > compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> > > > > > > consistency with WRMSR(IA32_DEBUGCTL) is desired.
> > > > > >
> > > > > > It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02
> > > > > >
> > > > > >         if (vmx->nested.nested_run_pending &&
> > > > > >             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> > > > > >                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> > > > > >                 vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
> > > > > >                                                vmx_get_supported_debugctl(vcpu, false)); <====
> > > > > >         } else {
> > > > > >                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> > > > > >                 vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
> > > > > >         }
> > > > > >
> > > > > > both from a correctness standpoint and so that users aren't mislead into thinking
> > > > > > the quirk lets L1 control of FREEZE_IN_SMM while running L2.
> > > > >
> > > > > Yes, it's probably worth pointing out that the VM is now subject to
> > > > > the whims of the L0 administrators.
> > > > >
> > > > > While that makes some sense for the legacy vPMU, where KVM is just
> > > > > another client of host perf, perhaps the decision should be revisited
> > > > > in the case of the MPT vPMU, where KVM owns the PMU while the vCPU is
> > > > > in VMX non-root operation.
> > >
> > > Eh, running guests with FREEZE_IN_SMM=0 seems absolutely crazy from a security
> > > perspective.  If an admin wants to disable FREEZE_IN_SMM, they get to keep the
> > > pieces.  And KVM definitely isn't going to override the admin, e.g. to allow the
> > > guest to profile host SMM.
> >
> > I'm not sure what you mean by "they get to keep the pieces." What is
> > the security problem with allowing L1 to freeze *guest-owned* PMCs
> > during SMM?
>
> To give L1 the option to freeze PMCs, KVM would also need to give L1 the option
> to *not* freeze PMCs.  At that point, the guest can use its PMCs to profile host
> SMM code.  Maybe even leverage a PMI to attack a poorly written SMM handler.

Perhaps I'm missing something. I was thinking, essentially, of a logical or:

vmcs02.debugctl.freeze_in_smm = vmcs12.debugctl.freeze_in_smm |
vmcs01.debugctl.freeze_in_smm

So, an L1 request to freeze counters in SMM would be granted, but an
L1 request to *not* freeze counters could be overruled by the host.

I'm not suggesting this in the context of the legacy vPMU, because
some PMCs may be counting host-initiated perf events, and L1 should
not have any say in what those PMCs count. However, with the mediated
vPMU, L1 owns the entire PMU while L2 is running, so it seems
reasonable to allow it to freeze the counters during physical SMM.

> In other words, unless I'm missing something, the only reasonable option is to
> run the guest with FREEZE_IN_SMM=1, which means ignoring the guest's wishes.
> Or I guess another way to look at it: you can have any color car you want, as
> long as it's black :-)

I would be happy with FREEZE_IN_SMM=1. I'm not happy with the host
dictating FREEZE_IN_SMM=0.
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Sean Christopherson 2 days, 13 hours ago
On Wed, Feb 04, 2026, Jim Mattson wrote:
> On Wed, Feb 4, 2026 at 5:18 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > On Wed, Feb 04, 2026, Jim Mattson wrote:
> > > On Tue, Feb 3, 2026 at 6:00 PM Sean Christopherson <seanjc@google.com> wrote:
> > > >
> > > > On Thu, Jan 22, 2026, Jim Mattson wrote:
> > > > > On Tue, Jan 13, 2026 at 7:47 PM Jim Mattson <jmattson@google.com> wrote:
> > > > > > On Tue, Jan 13, 2026 at 4:42 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > > > >
> > > > > > > On Tue, Jan 13, 2026, Jim Mattson wrote:
> > > > > > > > Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> > > > > > > > IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> > > > > > > > commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> > > > > > > > DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> > > > > > > > FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> > > > > > > > with L2's execution.  The quirk is enabled by default for backwards
> > > > > > > > compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> > > > > > > > consistency with WRMSR(IA32_DEBUGCTL) is desired.
> > > > > > >
> > > > > > > It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02
> > > > > > >
> > > > > > >         if (vmx->nested.nested_run_pending &&
> > > > > > >             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> > > > > > >                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> > > > > > >                 vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
> > > > > > >                                                vmx_get_supported_debugctl(vcpu, false)); <====
> > > > > > >         } else {
> > > > > > >                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> > > > > > >                 vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
> > > > > > >         }
> > > > > > >
> > > > > > > both from a correctness standpoint and so that users aren't mislead into thinking
> > > > > > > the quirk lets L1 control of FREEZE_IN_SMM while running L2.
> > > > > >
> > > > > > Yes, it's probably worth pointing out that the VM is now subject to
> > > > > > the whims of the L0 administrators.
> > > > > >
> > > > > > While that makes some sense for the legacy vPMU, where KVM is just
> > > > > > another client of host perf, perhaps the decision should be revisited
> > > > > > in the case of the MPT vPMU, where KVM owns the PMU while the vCPU is
> > > > > > in VMX non-root operation.
> > > >
> > > > Eh, running guests with FREEZE_IN_SMM=0 seems absolutely crazy from a security
> > > > perspective.  If an admin wants to disable FREEZE_IN_SMM, they get to keep the
> > > > pieces.  And KVM definitely isn't going to override the admin, e.g. to allow the
> > > > guest to profile host SMM.
> > >
> > > I'm not sure what you mean by "they get to keep the pieces." What is
> > > the security problem with allowing L1 to freeze *guest-owned* PMCs
> > > during SMM?
> >
> > To give L1 the option to freeze PMCs, KVM would also need to give L1 the option
> > to *not* freeze PMCs.  At that point, the guest can use its PMCs to profile host
> > SMM code.  Maybe even leverage a PMI to attack a poorly written SMM handler.
> 
> Perhaps I'm missing something. I was thinking, essentially, of a logical or:
> 
> vmcs02.debugctl.freeze_in_smm = vmcs12.debugctl.freeze_in_smm |
> vmcs01.debugctl.freeze_in_smm
> 
> So, an L1 request to freeze counters in SMM would be granted, but an
> L1 request to *not* freeze counters could be overruled by the host.

/facepalm

Sorry, I misunderstood what you were suggesting.  Not sure how, it's super obvious,
at least in hindsight.

> I'm not suggesting this in the context of the legacy vPMU, because
> some PMCs may be counting host-initiated perf events, and L1 should
> not have any say in what those PMCs count. However, with the mediated
> vPMU, L1 owns the entire PMU while L2 is running, so it seems
> reasonable to allow it to freeze the counters during physical SMM.

Agreed.

> > In other words, unless I'm missing something, the only reasonable option is to
> > run the guest with FREEZE_IN_SMM=1, which means ignoring the guest's wishes.
> > Or I guess another way to look at it: you can have any color car you want, as
> > long as it's black :-)
> 
> I would be happy with FREEZE_IN_SMM=1. I'm not happy with the host
> dictating FREEZE_IN_SMM=0.

Yep, make sense.
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Jim Mattson 2 days, 10 hours ago
On Thu, Feb 5, 2026 at 6:47 AM Sean Christopherson <seanjc@google.com> wrote:
>
> On Wed, Feb 04, 2026, Jim Mattson wrote:
> > On Wed, Feb 4, 2026 at 5:18 PM Sean Christopherson <seanjc@google.com> wrote:
> > >
> > > On Wed, Feb 04, 2026, Jim Mattson wrote:
> > > > On Tue, Feb 3, 2026 at 6:00 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > >
> > > > > On Thu, Jan 22, 2026, Jim Mattson wrote:
> > > > > > On Tue, Jan 13, 2026 at 7:47 PM Jim Mattson <jmattson@google.com> wrote:
> > > > > > > On Tue, Jan 13, 2026 at 4:42 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > > > > >
> > > > > > > > On Tue, Jan 13, 2026, Jim Mattson wrote:
> > > > > > > > > Add KVM_X86_QUIRK_VMCS12_FREEZE_IN_SMM to allow L1 to set
> > > > > > > > > IA32_DEBUGCTL.FREEZE_IN_SMM in vmcs12 when using nested VMX.  Prior to
> > > > > > > > > commit 6b1dd26544d0 ("KVM: VMX: Preserve host's
> > > > > > > > > DEBUGCTLMSR_FREEZE_IN_SMM while running the guest"), L1 could set
> > > > > > > > > FREEZE_IN_SMM in vmcs12 to freeze PMCs during physical SMM coincident
> > > > > > > > > with L2's execution.  The quirk is enabled by default for backwards
> > > > > > > > > compatibility; userspace can disable it via KVM_CAP_DISABLE_QUIRKS2 if
> > > > > > > > > consistency with WRMSR(IA32_DEBUGCTL) is desired.
> > > > > > > >
> > > > > > > > It's probably worth calling out that KVM will still drop FREEZE_IN_SMM in vmcs02
> > > > > > > >
> > > > > > > >         if (vmx->nested.nested_run_pending &&
> > > > > > > >             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> > > > > > > >                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> > > > > > > >                 vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
> > > > > > > >                                                vmx_get_supported_debugctl(vcpu, false)); <====
> > > > > > > >         } else {
> > > > > > > >                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> > > > > > > >                 vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
> > > > > > > >         }
> > > > > > > >
> > > > > > > > both from a correctness standpoint and so that users aren't mislead into thinking
> > > > > > > > the quirk lets L1 control of FREEZE_IN_SMM while running L2.
> > > > > > >
> > > > > > > Yes, it's probably worth pointing out that the VM is now subject to
> > > > > > > the whims of the L0 administrators.
> > > > > > >
> > > > > > > While that makes some sense for the legacy vPMU, where KVM is just
> > > > > > > another client of host perf, perhaps the decision should be revisited
> > > > > > > in the case of the MPT vPMU, where KVM owns the PMU while the vCPU is
> > > > > > > in VMX non-root operation.
> > > > >
> > > > > Eh, running guests with FREEZE_IN_SMM=0 seems absolutely crazy from a security
> > > > > perspective.  If an admin wants to disable FREEZE_IN_SMM, they get to keep the
> > > > > pieces.  And KVM definitely isn't going to override the admin, e.g. to allow the
> > > > > guest to profile host SMM.
> > > >
> > > > I'm not sure what you mean by "they get to keep the pieces." What is
> > > > the security problem with allowing L1 to freeze *guest-owned* PMCs
> > > > during SMM?
> > >
> > > To give L1 the option to freeze PMCs, KVM would also need to give L1 the option
> > > to *not* freeze PMCs.  At that point, the guest can use its PMCs to profile host
> > > SMM code.  Maybe even leverage a PMI to attack a poorly written SMM handler.
> >
> > Perhaps I'm missing something. I was thinking, essentially, of a logical or:
> >
> > vmcs02.debugctl.freeze_in_smm = vmcs12.debugctl.freeze_in_smm |
> > vmcs01.debugctl.freeze_in_smm
> >
> > So, an L1 request to freeze counters in SMM would be granted, but an
> > L1 request to *not* freeze counters could be overruled by the host.
>
> /facepalm
>
> Sorry, I misunderstood what you were suggesting.  Not sure how, it's super obvious,
> at least in hindsight.

My bad. I should have been more explicit (or maybe I should have just
omitted the aside).

> > I'm not suggesting this in the context of the legacy vPMU, because
> > some PMCs may be counting host-initiated perf events, and L1 should
> > not have any say in what those PMCs count. However, with the mediated
> > vPMU, L1 owns the entire PMU while L2 is running, so it seems
> > reasonable to allow it to freeze the counters during physical SMM.
>
> Agreed.
>
> > > In other words, unless I'm missing something, the only reasonable option is to
> > > run the guest with FREEZE_IN_SMM=1, which means ignoring the guest's wishes.
> > > Or I guess another way to look at it: you can have any color car you want, as
> > > long as it's black :-)
> >
> > I would be happy with FREEZE_IN_SMM=1. I'm not happy with the host
> > dictating FREEZE_IN_SMM=0.
>
> Yep, make sense.

Perhaps we should ignore both L0 and L1, and arbitrarily set
FREEZE_IN_SMM=1 for both vmcs01 and vmcs02 when MPT is enabled. But, I
don't think that discussion should block the resolution of this quirk.
I'll try to send v2 out later today.
Re: [PATCH] KVM: VMX: Add quirk to allow L1 to set FREEZE_IN_SMM in vmcs12
Posted by Sean Christopherson 2 days, 9 hours ago
On Thu, Feb 05, 2026, Jim Mattson wrote:
> On Thu, Feb 5, 2026 at 6:47 AM Sean Christopherson <seanjc@google.com> wrote:
> > > > In other words, unless I'm missing something, the only reasonable option is to
> > > > run the guest with FREEZE_IN_SMM=1, which means ignoring the guest's wishes.
> > > > Or I guess another way to look at it: you can have any color car you want, as
> > > > long as it's black :-)
> > >
> > > I would be happy with FREEZE_IN_SMM=1. I'm not happy with the host
> > > dictating FREEZE_IN_SMM=0.
> >
> > Yep, make sense.
> 
> Perhaps we should ignore both L0 and L1, and arbitrarily set
> FREEZE_IN_SMM=1 for both vmcs01 and vmcs02 when MPT is enabled. 

Hmm, I like that idea even more, because it's waaay simpler to implement.  Argh,
the wrinkle is that KVM doesn't actually know if DEBUGCTLMSR_FREEZE_IN_SMM is
supported.  Oh, nice, it's reported in PERF_CAPABILITIES.

  IA32_DEBUGCTL.FREEZE_WHILE_SMM is supported if
  IA32_PERF_CAPABILITIES.FREEZE_WHILE_SMM[Bit 12] is reporting 1

Arguably, this is a fix for mediated PMU support.  Because as you pointed out,
we can freeze PMCs on SMI for mediated vPMUs without impacting host profiling,
unlike the legacy vCPU where it being a weird extension of perf means we can't
deny guest profiling without breaking host perf usage.

This? (untested)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d3566bb1a93..5563f68158bb 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -324,6 +324,7 @@
 #define PERF_CAP_PEBS_TRAP             BIT_ULL(6)
 #define PERF_CAP_ARCH_REG              BIT_ULL(7)
 #define PERF_CAP_PEBS_FORMAT           0xf00
+#define PERF_CAP_FREEZE_IN_SMM         BIT_ULL(12)
 #define PERF_CAP_FW_WRITES             BIT_ULL(13)
 #define PERF_CAP_PEBS_BASELINE         BIT_ULL(14)
 #define PERF_CAP_PEBS_TIMING_INFO      BIT_ULL(17)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 27acafd03381..ef0d8108ff42 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8119,13 +8119,12 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 static __init u64 vmx_get_perf_capabilities(void)
 {
        u64 perf_cap = PERF_CAP_FW_WRITES;
-       u64 host_perf_cap = 0;
 
        if (!enable_pmu)
                return 0;
 
        if (boot_cpu_has(X86_FEATURE_PDCM))
-               rdmsrq(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
+               rdmsrq(MSR_IA32_PERF_CAPABILITIES, kvm_host.perf_capabilities);
 
        if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR) &&
            !enable_mediated_pmu) {
@@ -8139,11 +8138,11 @@ static __init u64 vmx_get_perf_capabilities(void)
                if (!vmx_lbr_caps.has_callstack)
                        memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps));
                else if (vmx_lbr_caps.nr)
-                       perf_cap |= host_perf_cap & PERF_CAP_LBR_FMT;
+                       perf_cap |= kvm_host.perf_capabilities & PERF_CAP_LBR_FMT;
        }
 
        if (vmx_pebs_supported()) {
-               perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
+               perf_cap |= kvm_host.perf_capabilities & PERF_CAP_PEBS_MASK;
 
                /*
                 * Disallow adaptive PEBS as it is functionally broken, can be
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 70bfe81dea54..e780d0e06b61 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -408,6 +408,11 @@ static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val)
        WARN_ON_ONCE(val & VMX_HOST_OWNED_DEBUGCTL_BITS);
 
        val |= vcpu->arch.host_debugctl & VMX_HOST_OWNED_DEBUGCTL_BITS;
+
+       if (kvm_vcpu_has_mediated_pmu(vcpu) &&
+           (kvm_host.perf_capabilities & PERF_CAP_FREEZE_IN_SMM))
+               val |= DEBUGCTLMSR_FREEZE_IN_SMM;
+
        vmcs_write64(GUEST_IA32_DEBUGCTL, val);
 }
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 70e81f008030..e0084e1063d0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -52,6 +52,7 @@ struct kvm_host_values {
        u64 xss;
        u64 s_cet;
        u64 arch_capabilities;
+       u64 perf_capabilities;
 };
 
 void kvm_spurious_fault(void);