[RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock

Yan Zhao posted 21 patches 7 months, 3 weeks ago
Only 20 patches received!
There is a newer version of this series
[RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock
Posted by Yan Zhao 7 months, 3 weeks ago
Introduce the split_external_spt hook and call it within tdp_mmu_set_spte()
for the mirror page table when kvm->mmu_lock is held for writing.

When tdp_mmu_set_spte() is invoked to transition an old leaf SPTE to a new
non-leaf SPTE in the mirror page table, use the split_external_spt hook to
propagate the entry splitting request to the external page table.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  4 ++++
 arch/x86/kvm/mmu/tdp_mmu.c         | 26 ++++++++++++++++++++------
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 79406bf07a1c..f8403e0f6c1e 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,6 +99,7 @@ KVM_X86_OP_OPTIONAL(link_external_spt)
 KVM_X86_OP_OPTIONAL(set_external_spte)
 KVM_X86_OP_OPTIONAL(free_external_spt)
 KVM_X86_OP_OPTIONAL(remove_external_spte)
+KVM_X86_OP_OPTIONAL(split_external_spt)
 KVM_X86_OP(has_wbinvd_exit)
 KVM_X86_OP(get_l2_tsc_offset)
 KVM_X86_OP(get_l2_tsc_multiplier)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f96d30ad4ae8..6962a8a424ef 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1812,6 +1812,10 @@ struct kvm_x86_ops {
 	int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
 				    kvm_pfn_t pfn_for_gfn);
 
+	/* Split the external page table into smaller page tables */
+	int (*split_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+				  void *external_spt);
+
 	bool (*has_wbinvd_exit)(void);
 
 	u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 799a08f91bf9..0f683753a7bb 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -325,6 +325,7 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 				bool shared);
 
 static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(bool mirror);
+static void *get_external_spt(gfn_t gfn, u64 new_spte, int level);
 
 static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
@@ -384,6 +385,19 @@ static void remove_external_spte(struct kvm *kvm, gfn_t gfn, u64 old_spte,
 	KVM_BUG_ON(ret, kvm);
 }
 
+static int split_external_spt(struct kvm *kvm, gfn_t gfn, u64 old_spte,
+			      u64 new_spte, int level)
+{
+	void *external_spt = get_external_spt(gfn, new_spte, level);
+	int ret;
+
+	KVM_BUG_ON(!external_spt, kvm);
+
+	ret = static_call(kvm_x86_split_external_spt)(kvm, gfn, level, external_spt);
+	KVM_BUG_ON(ret, kvm);
+
+	return ret;
+}
 /**
  * handle_removed_pt() - handle a page table removed from the TDP structure
  *
@@ -764,13 +778,13 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
 
 	handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
 
-	/*
-	 * Users that do non-atomic setting of PTEs don't operate on mirror
-	 * roots, so don't handle it and bug the VM if it's seen.
-	 */
 	if (is_mirror_sptep(sptep)) {
-		KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm);
-		remove_external_spte(kvm, gfn, old_spte, level);
+		if (!is_shadow_present_pte(new_spte))
+			remove_external_spte(kvm, gfn, old_spte, level);
+		else if (is_last_spte(old_spte, level) && !is_last_spte(new_spte, level))
+			split_external_spt(kvm, gfn, old_spte, new_spte, level);
+		else
+			KVM_BUG_ON(1, kvm);
 	}
 
 	return old_spte;
-- 
2.43.2
Re: [RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock
Posted by Binbin Wu 6 months, 4 weeks ago

On 4/24/2025 11:07 AM, Yan Zhao wrote:
[...]
>   
> +static int split_external_spt(struct kvm *kvm, gfn_t gfn, u64 old_spte,
> +			      u64 new_spte, int level)
> +{
> +	void *external_spt = get_external_spt(gfn, new_spte, level);
> +	int ret;
> +
> +	KVM_BUG_ON(!external_spt, kvm);
> +
> +	ret = static_call(kvm_x86_split_external_spt)(kvm, gfn, level, external_spt);
Better to use kvm_x86_call() instead of static_call().

> +	KVM_BUG_ON(ret, kvm);
> +
> +	return ret;
> +}
>   /**
>    * handle_removed_pt() - handle a page table removed from the TDP structure
>    *
> @@ -764,13 +778,13 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
>   
>   	handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
>   
> -	/*
> -	 * Users that do non-atomic setting of PTEs don't operate on mirror
> -	 * roots, so don't handle it and bug the VM if it's seen.
> -	 */
>   	if (is_mirror_sptep(sptep)) {
> -		KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm);
> -		remove_external_spte(kvm, gfn, old_spte, level);
> +		if (!is_shadow_present_pte(new_spte))
> +			remove_external_spte(kvm, gfn, old_spte, level);
> +		else if (is_last_spte(old_spte, level) && !is_last_spte(new_spte, level))
> +			split_external_spt(kvm, gfn, old_spte, new_spte, level);
> +		else
> +			KVM_BUG_ON(1, kvm);
>   	}
>   
>   	return old_spte;
Re: [RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock
Posted by Yan Zhao 6 months, 4 weeks ago
On Tue, May 20, 2025 at 01:40:46PM +0800, Binbin Wu wrote:
> 
> 
> On 4/24/2025 11:07 AM, Yan Zhao wrote:
> [...]
> > +static int split_external_spt(struct kvm *kvm, gfn_t gfn, u64 old_spte,
> > +			      u64 new_spte, int level)
> > +{
> > +	void *external_spt = get_external_spt(gfn, new_spte, level);
> > +	int ret;
> > +
> > +	KVM_BUG_ON(!external_spt, kvm);
> > +
> > +	ret = static_call(kvm_x86_split_external_spt)(kvm, gfn, level, external_spt);
> Better to use kvm_x86_call() instead of static_call().
Will do. Thanks!

> > +	KVM_BUG_ON(ret, kvm);
> > +
> > +	return ret;
> > +}
> >   /**
> >    * handle_removed_pt() - handle a page table removed from the TDP structure
> >    *
> > @@ -764,13 +778,13 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
> >   	handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
> > -	/*
> > -	 * Users that do non-atomic setting of PTEs don't operate on mirror
> > -	 * roots, so don't handle it and bug the VM if it's seen.
> > -	 */
> >   	if (is_mirror_sptep(sptep)) {
> > -		KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm);
> > -		remove_external_spte(kvm, gfn, old_spte, level);
> > +		if (!is_shadow_present_pte(new_spte))
> > +			remove_external_spte(kvm, gfn, old_spte, level);
> > +		else if (is_last_spte(old_spte, level) && !is_last_spte(new_spte, level))
> > +			split_external_spt(kvm, gfn, old_spte, new_spte, level);
> > +		else
> > +			KVM_BUG_ON(1, kvm);
> >   	}
> >   	return old_spte;
>
Re: [RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock
Posted by Edgecombe, Rick P 7 months ago
On Thu, 2025-04-24 at 11:07 +0800, Yan Zhao wrote:
> +static int split_external_spt(struct kvm *kvm, gfn_t gfn, u64 old_spte,
> +			      u64 new_spte, int level)
> +{
> +	void *external_spt = get_external_spt(gfn, new_spte, level);
> +	int ret;
> +
> +	KVM_BUG_ON(!external_spt, kvm);
> +
> +	ret = static_call(kvm_x86_split_external_spt)(kvm, gfn, level, external_spt);
> +	KVM_BUG_ON(ret, kvm);

Shouldn't this BUG_ON be handled in the split_external_spt implementation? I
don't think we need another one.

> +
> +	return ret;
> +}
>  /**
>   * handle_removed_pt() - handle a page table removed from the TDP structure
>   *
> @@ -764,13 +778,13 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
>  
>  	handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
>  
> -	/*
> -	 * Users that do non-atomic setting of PTEs don't operate on mirror
> -	 * roots, so don't handle it and bug the VM if it's seen.
> -	 */
>  	if (is_mirror_sptep(sptep)) {
> -		KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm);
> -		remove_external_spte(kvm, gfn, old_spte, level);
> +		if (!is_shadow_present_pte(new_spte))
> +			remove_external_spte(kvm, gfn, old_spte, level);
> +		else if (is_last_spte(old_spte, level) && !is_last_spte(new_spte, level))
> +			split_external_spt(kvm, gfn, old_spte, new_spte, level);
> +		else
> +			KVM_BUG_ON(1, kvm);

It might be worth a comment what this is looking for at this point. I think it's
that external EPT only support certain operations, so bug if any unsupported
operations are seen.

>  	}
>  
>  	return old_spte;

Re: [RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock
Posted by Yan Zhao 7 months ago
On Wed, May 14, 2025 at 07:06:48AM +0800, Edgecombe, Rick P wrote:
> On Thu, 2025-04-24 at 11:07 +0800, Yan Zhao wrote:
> > +static int split_external_spt(struct kvm *kvm, gfn_t gfn, u64 old_spte,
> > +			      u64 new_spte, int level)
> > +{
> > +	void *external_spt = get_external_spt(gfn, new_spte, level);
> > +	int ret;
> > +
> > +	KVM_BUG_ON(!external_spt, kvm);
> > +
> > +	ret = static_call(kvm_x86_split_external_spt)(kvm, gfn, level, external_spt);
> > +	KVM_BUG_ON(ret, kvm);
> 
> Shouldn't this BUG_ON be handled in the split_external_spt implementation? I
> don't think we need another one.
Ok. But kvm_x86_split_external_spt() is not for TDX only.
Is it good for KVM MMU core to rely on each implementation to trigger BUG_ON?

> > +	return ret;
> > +}
> >  /**
> >   * handle_removed_pt() - handle a page table removed from the TDP structure
> >   *
> > @@ -764,13 +778,13 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
> >  
> >  	handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
> >  
> > -	/*
> > -	 * Users that do non-atomic setting of PTEs don't operate on mirror
> > -	 * roots, so don't handle it and bug the VM if it's seen.
> > -	 */
> >  	if (is_mirror_sptep(sptep)) {
> > -		KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm);
> > -		remove_external_spte(kvm, gfn, old_spte, level);
> > +		if (!is_shadow_present_pte(new_spte))
> > +			remove_external_spte(kvm, gfn, old_spte, level);
> > +		else if (is_last_spte(old_spte, level) && !is_last_spte(new_spte, level))
> > +			split_external_spt(kvm, gfn, old_spte, new_spte, level);
> > +		else
> > +			KVM_BUG_ON(1, kvm);
> 
> It might be worth a comment what this is looking for at this point. I think it's
> that external EPT only support certain operations, so bug if any unsupported
> operations are seen.
Will do.

> >  	}
> >  
> >  	return old_spte;
> 
Re: [RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock
Posted by Edgecombe, Rick P 7 months ago
On Fri, 2025-05-16 at 17:17 +0800, Yan Zhao wrote:
> > Shouldn't this BUG_ON be handled in the split_external_spt implementation? I
> > don't think we need another one.
> Ok. But kvm_x86_split_external_spt() is not for TDX only.
> Is it good for KVM MMU core to rely on each implementation to trigger BUG_ON?

It effectively is for TDX only. At least for the foreseeable future. The naming
basically means that people don't have to see "TDX" everywhere when they look in
the MMU code.

> 
> > 
Re: [RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock
Posted by Yan Zhao 7 months ago
On Sat, May 17, 2025 at 06:11:59AM +0800, Edgecombe, Rick P wrote:
> On Fri, 2025-05-16 at 17:17 +0800, Yan Zhao wrote:
> > > Shouldn't this BUG_ON be handled in the split_external_spt implementation? I
> > > don't think we need another one.
> > Ok. But kvm_x86_split_external_spt() is not for TDX only.
> > Is it good for KVM MMU core to rely on each implementation to trigger BUG_ON?
> 
> It effectively is for TDX only. At least for the foreseeable future. The naming
> basically means that people don't have to see "TDX" everywhere when they look in
> the MMU code.
Hmm, another reason to add the BUG_ON is to align it with remove_external_spte().
There's also a KVM_BUG_ON() following the remove_external_spte hook.

I interpret this as error handling in the KVM MMU core, which returns "void",
so issuing BUG_ON if ret != 0.
Re: [RFC PATCH 14/21] KVM: x86/tdp_mmu: Invoke split_external_spt hook with exclusive mmu_lock
Posted by Edgecombe, Rick P 7 months ago
On Mon, 2025-05-19 at 12:01 +0800, Yan Zhao wrote:
> On Sat, May 17, 2025 at 06:11:59AM +0800, Edgecombe, Rick P wrote:
> > On Fri, 2025-05-16 at 17:17 +0800, Yan Zhao wrote:
> > > > Shouldn't this BUG_ON be handled in the split_external_spt implementation? I
> > > > don't think we need another one.
> > > Ok. But kvm_x86_split_external_spt() is not for TDX only.
> > > Is it good for KVM MMU core to rely on each implementation to trigger BUG_ON?
> > 
> > It effectively is for TDX only. At least for the foreseeable future. The naming
> > basically means that people don't have to see "TDX" everywhere when they look in
> > the MMU code.
> Hmm, another reason to add the BUG_ON is to align it with remove_external_spte().
> There's also a KVM_BUG_ON() following the remove_external_spte hook.
> 
> I interpret this as error handling in the KVM MMU core, which returns "void",
> so issuing BUG_ON if ret != 0.

This is related to the other thread about how to handle demote failure. Let's
continue there.

But in general, the amount of KVM_BUG_ON()s we have for mirror EPT is a bit of a
code smell. It's not exclusive to this series. But I'd love if we could keep it
from getting worse.