[PATCH v4 4/4] x86: KVM: VMX: preserve DEBUGCTLMSR_FREEZE_IN_SMM

Maxim Levitsky posted 4 patches 7 months ago
There is a newer version of this series
[PATCH v4 4/4] x86: KVM: VMX: preserve DEBUGCTLMSR_FREEZE_IN_SMM
Posted by Maxim Levitsky 7 months ago
Pass through the host's DEBUGCTL.DEBUGCTLMSR_FREEZE_IN_SMM to the guest
GUEST_IA32_DEBUGCTL without the guest seeing this value.

Since the value of the host DEBUGCTL can in theory change between VM runs,
check if has changed, and if yes, then reload the GUEST_IA32_DEBUGCTL with
the new value.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/nested.c       |  4 ++--
 arch/x86/kvm/vmx/vmx.c          | 22 +++++++++++++++++++---
 arch/x86/kvm/vmx/vmx.h          |  2 ++
 arch/x86/kvm/x86.c              |  7 +++++--
 5 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d2ad31a1628e..2e7e4a8b392e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1673,6 +1673,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
 enum kvm_x86_run_flags {
 	KVM_RUN_FORCE_IMMEDIATE_EXIT	= BIT(0),
 	KVM_RUN_LOAD_GUEST_DR6		= BIT(1),
+	KVM_RUN_LOAD_DEBUGCTL		= BIT(2),
 };
 
 struct kvm_x86_ops {
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0bda6400e30a..0a572356119f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2653,7 +2653,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
 		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
-		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+		vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl);
 	} else {
 		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
 		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl);
@@ -4792,7 +4792,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
 
 	kvm_set_dr(vcpu, 7, 0x400);
-	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+	vmx_guest_debugctl_write(vcpu, 0);
 
 	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
 				vmcs12->vm_exit_msr_load_count))
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9046ee2e9a04..c70fe7cbede6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2154,7 +2154,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
-		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+		msr_info->data = vmx_guest_debugctl_read();
 		break;
 	default:
 	find_uret_msr:
@@ -2194,6 +2194,17 @@ u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
 	return debugctl;
 }
 
+void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val)
+{
+	val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM;
+	vmcs_write64(GUEST_IA32_DEBUGCTL, val);
+}
+
+u64 vmx_guest_debugctl_read(void)
+{
+	return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM;
+}
+
 /*
  * Writes msr value into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
@@ -2279,7 +2290,8 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 						VM_EXIT_SAVE_DEBUG_CONTROLS)
 			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
 
-		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+		vmx_guest_debugctl_write(vcpu, data);
+
 		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
 		    (data & DEBUGCTLMSR_LBR))
 			intel_pmu_create_guest_lbr_event(vcpu);
@@ -4795,7 +4807,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 	vmcs_write32(GUEST_SYSENTER_CS, 0);
 	vmcs_writel(GUEST_SYSENTER_ESP, 0);
 	vmcs_writel(GUEST_SYSENTER_EIP, 0);
-	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+	vmx_guest_debugctl_write(&vmx->vcpu, 0);
 
 	if (cpu_has_vmx_tpr_shadow()) {
 		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
@@ -7368,6 +7381,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
 	if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
 		set_debugreg(vcpu->arch.dr6, 6);
 
+	if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
+		vmx_guest_debugctl_write(vcpu, vmx_guest_debugctl_read());
+
 	/*
 	 * Refresh vmcs.HOST_CR3 if necessary.  This must be done immediately
 	 * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 1b80479505d3..5ddedf73392b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -416,6 +416,8 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
 
 void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
 u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
+void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val);
+u64 vmx_guest_debugctl_read(void);
 
 /*
  * Note, early Intel manuals have the write-low and read-high bitmap offsets
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 684b8047e0f2..a85078dfa36d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10752,7 +10752,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		dm_request_for_irq_injection(vcpu) &&
 		kvm_cpu_accept_dm_intr(vcpu);
 	fastpath_t exit_fastpath;
-	u64 run_flags;
+	u64 run_flags, host_debug_ctl;
 
 	bool req_immediate_exit = false;
 
@@ -11024,7 +11024,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		set_debugreg(0, 7);
 	}
 
-	vcpu->arch.host_debugctl = get_debugctlmsr();
+	host_debug_ctl = get_debugctlmsr();
+	if (host_debug_ctl != vcpu->arch.host_debugctl)
+		run_flags |= KVM_RUN_LOAD_DEBUGCTL;
+	vcpu->arch.host_debugctl = host_debug_ctl;
 
 	guest_timing_enter_irqoff();
 
-- 
2.46.0
Re: [PATCH v4 4/4] x86: KVM: VMX: preserve DEBUGCTLMSR_FREEZE_IN_SMM
Posted by Sean Christopherson 6 months, 4 weeks ago
KVM: VMX:

On Wed, May 14, 2025, Maxim Levitsky wrote:
> Pass through the host's DEBUGCTL.DEBUGCTLMSR_FREEZE_IN_SMM to the guest
> GUEST_IA32_DEBUGCTL without the guest seeing this value.
> 
> Since the value of the host DEBUGCTL can in theory change between VM runs,
> check if has changed, and if yes, then reload the GUEST_IA32_DEBUGCTL with
> the new value.

Please split this into two patches.  Add vmx_guest_debugctl_{read,write}(), then
land the FREEZE_IN_SMM change on top.  Adding the helpers should be a nop and
thus trivial to review, and similarly the DEBUGCTLMSR_FREEZE_IN_SMM change is
actually pretty small.  But combined, this patch is annoying to review because
there's a lot of uninteresting diff to wade through to get at the FREEZE_IN_SMM
logic.

> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/vmx/nested.c       |  4 ++--
>  arch/x86/kvm/vmx/vmx.c          | 22 +++++++++++++++++++---
>  arch/x86/kvm/vmx/vmx.h          |  2 ++
>  arch/x86/kvm/x86.c              |  7 +++++--
>  5 files changed, 29 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d2ad31a1628e..2e7e4a8b392e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1673,6 +1673,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
>  enum kvm_x86_run_flags {
>  	KVM_RUN_FORCE_IMMEDIATE_EXIT	= BIT(0),
>  	KVM_RUN_LOAD_GUEST_DR6		= BIT(1),
> +	KVM_RUN_LOAD_DEBUGCTL		= BIT(2),
>  };
>  
>  struct kvm_x86_ops {

...

> @@ -7368,6 +7381,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
>  	if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
>  		set_debugreg(vcpu->arch.dr6, 6);
>  
> +	if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
> +		vmx_guest_debugctl_write(vcpu, vmx_guest_debugctl_read());
> +
>  	/*
>  	 * Refresh vmcs.HOST_CR3 if necessary.  This must be done immediately
>  	 * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 1b80479505d3..5ddedf73392b 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -416,6 +416,8 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
>  
>  void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
>  u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
> +void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val);
> +u64 vmx_guest_debugctl_read(void);

I vote to make these static inlines, I don't see any reason to bury them in vmx.c

>  /*
>   * Note, early Intel manuals have the write-low and read-high bitmap offsets
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 684b8047e0f2..a85078dfa36d 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10752,7 +10752,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		dm_request_for_irq_injection(vcpu) &&
>  		kvm_cpu_accept_dm_intr(vcpu);
>  	fastpath_t exit_fastpath;
> -	u64 run_flags;
> +	u64 run_flags, host_debug_ctl;
>  
>  	bool req_immediate_exit = false;
>  
> @@ -11024,7 +11024,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		set_debugreg(0, 7);
>  	}
>  
> -	vcpu->arch.host_debugctl = get_debugctlmsr();
> +	host_debug_ctl = get_debugctlmsr();

This can probably just be debug_ctl to shorten the lines, I don't see a strong
need to clarify it's the host's value since all accesses are clustered together.

> +	if (host_debug_ctl != vcpu->arch.host_debugctl)
> +		run_flags |= KVM_RUN_LOAD_DEBUGCTL;
> +	vcpu->arch.host_debugctl = host_debug_ctl;

Argh, the TDX series didn't get refreshed (or maybe it got poorly rebased), and
now there's a redundant and confusing "host_debugctlmsr" field in vcpu_vt.  Can
you slot in the below?  It's not urgent enough to warrant posting separately,
and handling TDX in this series would get a bit wonky if TDX uses a different
snapshot.

The reason I say that TDX will get wonky is also why I think the "are bits
changing?" check in x86.c needs to be precise.  KVM_RUN_LOAD_DEBUGCTL should
*never* be set for TDX and SVM, and so they should WARN instead of silently
doing nothing.  But to do that without generating false positives, the common
check needs to be precise.

I was going to say we could throw a mask in kvm_x86_ops, but TDX throws a wrench
in that idea.  Aha!  Actually, we can still use kvm_x86_ops.  TDX can be exempted
via guest_state_protected.  E.g. in common x86:

	debug_ctl = get_debugctlmsr();
	if (((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_DEBUGCTL_MASK) &&
	    !vcpu->arch.guest_state_protected)
		run_flags |= KVM_RUN_LOAD_DEBUGCTL;
	vcpu->arch.host_debugctl = debug_ctl;

--
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 20 May 2025 15:37:41 -0700
Subject: [PATCH] KVM: TDX: Use kvm_arch_vcpu.host_debugctl to restore the
 host's DEBUGCTL

Use the kvm_arch_vcpu.host_debugctl snapshot to restore DEBUGCTL after
running a TD vCPU.  The final TDX series rebase was mishandled, likely due
to commit fb71c7959356 ("KVM: x86: Snapshot the host's DEBUGCTL in common
x86") deleting the same line of code from vmx.h, i.e. creating a semantic
conflict of sorts, but no syntactic conflict.

Using the version in kvm_vcpu_arch picks up the ulong => u64 fix (which
isn't relevant to TDX) as well as the IRQ fix from commit 189ecdb3e112
("KVM: x86: Snapshot the host's DEBUGCTL after disabling IRQs").

Link: https://lore.kernel.org/all/20250307212053.2948340-10-pbonzini@redhat.com
Cc: Adrian Hunter <adrian.hunter@intel.com>
Fixes: 8af099037527 ("KVM: TDX: Save and restore IA32_DEBUGCTL")
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/common.h | 2 --
 arch/x86/kvm/vmx/tdx.c    | 6 ++----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index 8f46a06e2c44..66454bead202 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -53,8 +53,6 @@ struct vcpu_vt {
 #ifdef CONFIG_X86_64
 	u64		msr_host_kernel_gs_base;
 #endif
-
-	unsigned long	host_debugctlmsr;
 };
 
 #ifdef CONFIG_KVM_INTEL_TDX
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 7dbfad28debc..84b2922b8119 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -778,8 +778,6 @@ void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	else
 		vt->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
 
-	vt->host_debugctlmsr = get_debugctlmsr();
-
 	vt->guest_state_loaded = true;
 }
 
@@ -1056,8 +1054,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
 
 	tdx_vcpu_enter_exit(vcpu);
 
-	if (vt->host_debugctlmsr & ~TDX_DEBUGCTL_PRESERVED)
-		update_debugctlmsr(vt->host_debugctlmsr);
+	if (vcpu->arch.host_debugctl & ~TDX_DEBUGCTL_PRESERVED)
+		update_debugctlmsr(vcpu->arch.host_debugctl);
 
 	tdx_load_host_xsave_state(vcpu);
 	tdx->guest_entered = true;

base-commit: 475a02020ac2de6b10e85de75e79833139b556e0
--
Re: [PATCH v4 4/4] x86: KVM: VMX: preserve DEBUGCTLMSR_FREEZE_IN_SMM
Posted by mlevitsk@redhat.com 6 months, 4 weeks ago
On Tue, 2025-05-20 at 15:57 -0700, Sean Christopherson wrote:
> KVM: VMX:
> 
> On Wed, May 14, 2025, Maxim Levitsky wrote:
> > Pass through the host's DEBUGCTL.DEBUGCTLMSR_FREEZE_IN_SMM to the guest
> > GUEST_IA32_DEBUGCTL without the guest seeing this value.
> > 
> > Since the value of the host DEBUGCTL can in theory change between VM runs,
> > check if has changed, and if yes, then reload the GUEST_IA32_DEBUGCTL with
> > the new value.
> 
> Please split this into two patches.  Add vmx_guest_debugctl_{read,write}(), then
> land the FREEZE_IN_SMM change on top.  Adding the helpers should be a nop and
> thus trivial to review, and similarly the DEBUGCTLMSR_FREEZE_IN_SMM change is
> actually pretty small.  But combined, this patch is annoying to review because
> there's a lot of uninteresting diff to wade through to get at the FREEZE_IN_SMM
> logic.

No problem.

> 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  1 +
> >  arch/x86/kvm/vmx/nested.c       |  4 ++--
> >  arch/x86/kvm/vmx/vmx.c          | 22 +++++++++++++++++++---
> >  arch/x86/kvm/vmx/vmx.h          |  2 ++
> >  arch/x86/kvm/x86.c              |  7 +++++--
> >  5 files changed, 29 insertions(+), 7 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index d2ad31a1628e..2e7e4a8b392e 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1673,6 +1673,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
> >  enum kvm_x86_run_flags {
> >  	KVM_RUN_FORCE_IMMEDIATE_EXIT	= BIT(0),
> >  	KVM_RUN_LOAD_GUEST_DR6		= BIT(1),
> > +	KVM_RUN_LOAD_DEBUGCTL		= BIT(2),
> >  };
> >  
> >  struct kvm_x86_ops {
> 
> ...
> 
> > @@ -7368,6 +7381,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
> >  	if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
> >  		set_debugreg(vcpu->arch.dr6, 6);
> >  
> > +	if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
> > +		vmx_guest_debugctl_write(vcpu, vmx_guest_debugctl_read());
> > +
> >  	/*
> >  	 * Refresh vmcs.HOST_CR3 if necessary.  This must be done immediately
> >  	 * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
> > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> > index 1b80479505d3..5ddedf73392b 100644
> > --- a/arch/x86/kvm/vmx/vmx.h
> > +++ b/arch/x86/kvm/vmx/vmx.h
> > @@ -416,6 +416,8 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
> >  
> >  void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
> >  u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
> > +void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val);
> > +u64 vmx_guest_debugctl_read(void);
> 
> I vote to make these static inlines, I don't see any reason to bury them in vmx.c

No problem as well.

> 
> >  /*
> >   * Note, early Intel manuals have the write-low and read-high bitmap offsets
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 684b8047e0f2..a85078dfa36d 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -10752,7 +10752,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  		dm_request_for_irq_injection(vcpu) &&
> >  		kvm_cpu_accept_dm_intr(vcpu);
> >  	fastpath_t exit_fastpath;
> > -	u64 run_flags;
> > +	u64 run_flags, host_debug_ctl;
> >  
> >  	bool req_immediate_exit = false;
> >  
> > @@ -11024,7 +11024,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  		set_debugreg(0, 7);
> >  	}
> >  
> > -	vcpu->arch.host_debugctl = get_debugctlmsr();
> > +	host_debug_ctl = get_debugctlmsr();
> 
> This can probably just be debug_ctl to shorten the lines, I don't see a strong
> need to clarify it's the host's value since all accesses are clustered together.
> 
> > +	if (host_debug_ctl != vcpu->arch.host_debugctl)
> > +		run_flags |= KVM_RUN_LOAD_DEBUGCTL;
> > +	vcpu->arch.host_debugctl = host_debug_ctl;
> 
> Argh, the TDX series didn't get refreshed (or maybe it got poorly rebased), and
> now there's a redundant and confusing "host_debugctlmsr" field in vcpu_vt.  Can
> you slot in the below?  It's not urgent enough to warrant posting separately,
> and handling TDX in this series would get a bit wonky if TDX uses a different
> snapshot.
> 
> The reason I say that TDX will get wonky is also why I think the "are bits
> changing?" check in x86.c needs to be precise.  KVM_RUN_LOAD_DEBUGCTL should
> *never* be set for TDX and SVM, and so they should WARN instead of silently
> doing nothing.  But to do that without generating false positives, the common
> check needs to be precise.
> 
> I was going to say we could throw a mask in kvm_x86_ops, but TDX throws a wrench
> in that idea.  Aha!  Actually, we can still use kvm_x86_ops.  TDX can be exempted
> via guest_state_protected.  E.g. in common x86:
> 
> 	debug_ctl = get_debugctlmsr();
> 	if (((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_DEBUGCTL_MASK) &&
> 	    !vcpu->arch.guest_state_protected)
> 		run_flags |= KVM_RUN_LOAD_DEBUGCTL;
> 	vcpu->arch.host_debugctl = debug_ctl;

Makes sense. I need to remember that TDX has arrived so I have to take it
in the account from now on.

All right, thanks for the review, I will post a new patch series soon.


Best regards,
	Maxim Levitsky

> 
> --
> From: Sean Christopherson <seanjc@google.com>
> Date: Tue, 20 May 2025 15:37:41 -0700
> Subject: [PATCH] KVM: TDX: Use kvm_arch_vcpu.host_debugctl to restore the
>  host's DEBUGCTL
> 
> Use the kvm_arch_vcpu.host_debugctl snapshot to restore DEBUGCTL after
> running a TD vCPU.  The final TDX series rebase was mishandled, likely due
> to commit fb71c7959356 ("KVM: x86: Snapshot the host's DEBUGCTL in common
> x86") deleting the same line of code from vmx.h, i.e. creating a semantic
> conflict of sorts, but no syntactic conflict.
> 
> Using the version in kvm_vcpu_arch picks up the ulong => u64 fix (which
> isn't relevant to TDX) as well as the IRQ fix from commit 189ecdb3e112
> ("KVM: x86: Snapshot the host's DEBUGCTL after disabling IRQs").
> 
> Link: https://lore.kernel.org/all/20250307212053.2948340-10-pbonzini@redhat.com
> Cc: Adrian Hunter <adrian.hunter@intel.com>
> Fixes: 8af099037527 ("KVM: TDX: Save and restore IA32_DEBUGCTL")
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/vmx/common.h | 2 --
>  arch/x86/kvm/vmx/tdx.c    | 6 ++----
>  2 files changed, 2 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
> index 8f46a06e2c44..66454bead202 100644
> --- a/arch/x86/kvm/vmx/common.h
> +++ b/arch/x86/kvm/vmx/common.h
> @@ -53,8 +53,6 @@ struct vcpu_vt {
>  #ifdef CONFIG_X86_64
>  	u64		msr_host_kernel_gs_base;
>  #endif
> -
> -	unsigned long	host_debugctlmsr;
>  };
>  
>  #ifdef CONFIG_KVM_INTEL_TDX
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 7dbfad28debc..84b2922b8119 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -778,8 +778,6 @@ void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
>  	else
>  		vt->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
>  
> -	vt->host_debugctlmsr = get_debugctlmsr();
> -
>  	vt->guest_state_loaded = true;
>  }
>  
> @@ -1056,8 +1054,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
>  
>  	tdx_vcpu_enter_exit(vcpu);
>  
> -	if (vt->host_debugctlmsr & ~TDX_DEBUGCTL_PRESERVED)
> -		update_debugctlmsr(vt->host_debugctlmsr);
> +	if (vcpu->arch.host_debugctl & ~TDX_DEBUGCTL_PRESERVED)
> +		update_debugctlmsr(vcpu->arch.host_debugctl);
>  
>  	tdx_load_host_xsave_state(vcpu);
>  	tdx->guest_entered = true;
> 
> base-commit: 475a02020ac2de6b10e85de75e79833139b556e0
> --
> 
Re: [PATCH v4 4/4] x86: KVM: VMX: preserve DEBUGCTLMSR_FREEZE_IN_SMM
Posted by Chao Gao 7 months ago
>@@ -7368,6 +7381,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
> 	if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
> 		set_debugreg(vcpu->arch.dr6, 6);
> 
>+	if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
>+		vmx_guest_debugctl_write(vcpu, vmx_guest_debugctl_read());

...

>+
> 	/*
> 	 * Refresh vmcs.HOST_CR3 if necessary.  This must be done immediately
> 	 * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
>diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
>index 1b80479505d3..5ddedf73392b 100644
>--- a/arch/x86/kvm/vmx/vmx.h
>+++ b/arch/x86/kvm/vmx/vmx.h
>@@ -416,6 +416,8 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
> 
> void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
> u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
>+void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val);
>+u64 vmx_guest_debugctl_read(void);
> 
> /*
>  * Note, early Intel manuals have the write-low and read-high bitmap offsets
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index 684b8047e0f2..a85078dfa36d 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -10752,7 +10752,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 		dm_request_for_irq_injection(vcpu) &&
> 		kvm_cpu_accept_dm_intr(vcpu);
> 	fastpath_t exit_fastpath;
>-	u64 run_flags;
>+	u64 run_flags, host_debug_ctl;
> 
> 	bool req_immediate_exit = false;
> 
>@@ -11024,7 +11024,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 		set_debugreg(0, 7);
> 	}
> 
>-	vcpu->arch.host_debugctl = get_debugctlmsr();
>+	host_debug_ctl = get_debugctlmsr();
>+	if (host_debug_ctl != vcpu->arch.host_debugctl)
>+		run_flags |= KVM_RUN_LOAD_DEBUGCTL;
>+	vcpu->arch.host_debugctl = host_debug_ctl;

IIUC, using run_flags here may only update the GUEST_DEBUGCTL field of a vmcs02,
leaving vmcs01 not updated.

> 
> 	guest_timing_enter_irqoff();
> 
>-- 
>2.46.0
>
>
Re: [PATCH v4 4/4] x86: KVM: VMX: preserve DEBUGCTLMSR_FREEZE_IN_SMM
Posted by mlevitsk@redhat.com 7 months ago
On Fri, 2025-05-16 at 11:39 +0800, Chao Gao wrote:
> > @@ -7368,6 +7381,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
> > 	if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
> > 		set_debugreg(vcpu->arch.dr6, 6);
> > 
> > +	if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
> > +		vmx_guest_debugctl_write(vcpu, vmx_guest_debugctl_read());
> 
> ...
> 
> > +
> > 	/*
> > 	 * Refresh vmcs.HOST_CR3 if necessary.  This must be done immediately
> > 	 * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
> > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> > index 1b80479505d3..5ddedf73392b 100644
> > --- a/arch/x86/kvm/vmx/vmx.h
> > +++ b/arch/x86/kvm/vmx/vmx.h
> > @@ -416,6 +416,8 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
> > 
> > void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
> > u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
> > +void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val);
> > +u64 vmx_guest_debugctl_read(void);
> > 
> > /*
> >  * Note, early Intel manuals have the write-low and read-high bitmap offsets
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 684b8047e0f2..a85078dfa36d 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -10752,7 +10752,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > 		dm_request_for_irq_injection(vcpu) &&
> > 		kvm_cpu_accept_dm_intr(vcpu);
> > 	fastpath_t exit_fastpath;
> > -	u64 run_flags;
> > +	u64 run_flags, host_debug_ctl;
> > 
> > 	bool req_immediate_exit = false;
> > 
> > @@ -11024,7 +11024,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > 		set_debugreg(0, 7);
> > 	}
> > 
> > -	vcpu->arch.host_debugctl = get_debugctlmsr();
> > +	host_debug_ctl = get_debugctlmsr();
> > +	if (host_debug_ctl != vcpu->arch.host_debugctl)
> > +		run_flags |= KVM_RUN_LOAD_DEBUGCTL;
> > +	vcpu->arch.host_debugctl = host_debug_ctl;
> 
> IIUC, using run_flags here may only update the GUEST_DEBUGCTL field of a vmcs02,
> leaving vmcs01 not updated.

Hi,

Thankfully this should not be a problem because when KVM exits from vmcs02 to vmcs01 the
IA32_DEBUGCTL is reset to 0. 

If I’m not mistaken, this always happens regardless of the VM_ENTRY_LOAD_DEBUG_CONTROLS.

Now since I wrapped the write of the with a helper function (vmx_guest_debugctl_write),
it should pick up the new value.


Best regards,
	Maxim Levitsky

> 
> > 
> > 	guest_timing_enter_irqoff();
> > 
> > -- 
> > 2.46.0
> > 
> > 
>