[RFC PATCH 05/21] KVM: TDX: Enhance tdx_clear_page() to support huge pages

Yan Zhao posted 21 patches 7 months, 3 weeks ago
Only 20 patches received!
There is a newer version of this series
[RFC PATCH 05/21] KVM: TDX: Enhance tdx_clear_page() to support huge pages
Posted by Yan Zhao 7 months, 3 weeks ago
From: Xiaoyao Li <xiaoyao.li@intel.com>

KVM invokes tdx_clear_page() to zero pages using movdir64b().
Include level information to enable tdx_clear_page() to zero a huge page.

[Yan: split out, let tdx_clear_page() accept level]

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
 arch/x86/kvm/vmx/tdx.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 03885cb2869b..1186085795ac 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -276,7 +276,7 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu)
 	vcpu->cpu = -1;
 }
 
-static void tdx_clear_page(struct page *page)
+static void __tdx_clear_page(struct page *page)
 {
 	const void *zero_page = (const void *) page_to_virt(ZERO_PAGE(0));
 	void *dest = page_to_virt(page);
@@ -295,6 +295,15 @@ static void tdx_clear_page(struct page *page)
 	__mb();
 }
 
+static void tdx_clear_page(struct page *page, int level)
+{
+	unsigned long nr = KVM_PAGES_PER_HPAGE(level);
+	unsigned long idx = 0;
+
+	while (nr--)
+		__tdx_clear_page(nth_page(page, idx++));
+}
+
 static void tdx_no_vcpus_enter_start(struct kvm *kvm)
 {
 	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
@@ -340,11 +349,10 @@ static int tdx_reclaim_page(struct page *page)
 
 	r = __tdx_reclaim_page(page);
 	if (!r)
-		tdx_clear_page(page);
+		tdx_clear_page(page, PG_LEVEL_4K);
 	return r;
 }
 
-
 /*
  * Reclaim the TD control page(s) which are crypto-protected by TDX guest's
  * private KeyID.  Assume the cache associated with the TDX private KeyID has
@@ -588,7 +596,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
 		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err);
 		return;
 	}
-	tdx_clear_page(kvm_tdx->td.tdr_page);
+	tdx_clear_page(kvm_tdx->td.tdr_page, PG_LEVEL_4K);
 
 	__free_page(kvm_tdx->td.tdr_page);
 	kvm_tdx->td.tdr_page = NULL;
@@ -1621,7 +1629,8 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
 		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err);
 		return -EIO;
 	}
-	tdx_clear_page(page);
+
+	tdx_clear_page(page, level);
 	tdx_unpin(kvm, page);
 	return 0;
 }
-- 
2.43.2
Re: [RFC PATCH 05/21] KVM: TDX: Enhance tdx_clear_page() to support huge pages
Posted by Edgecombe, Rick P 7 months ago
On Thu, 2025-04-24 at 11:05 +0800, Yan Zhao wrote:
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> 
> KVM invokes tdx_clear_page() to zero pages using movdir64b().
> Include level information to enable tdx_clear_page() to zero a huge page.
> 
> [Yan: split out, let tdx_clear_page() accept level]
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
> ---
>  arch/x86/kvm/vmx/tdx.c | 19 ++++++++++++++-----
>  1 file changed, 14 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 03885cb2869b..1186085795ac 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -276,7 +276,7 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu)
>  	vcpu->cpu = -1;
>  }
>  
> -static void tdx_clear_page(struct page *page)
> +static void __tdx_clear_page(struct page *page)
>  {
>  	const void *zero_page = (const void *) page_to_virt(ZERO_PAGE(0));
>  	void *dest = page_to_virt(page);
> @@ -295,6 +295,15 @@ static void tdx_clear_page(struct page *page)
>  	__mb();
>  }
>  
> +static void tdx_clear_page(struct page *page, int level)
> +{
> +	unsigned long nr = KVM_PAGES_PER_HPAGE(level);
> +	unsigned long idx = 0;
> +
> +	while (nr--)
> +		__tdx_clear_page(nth_page(page, idx++));

You shouldn't need both idx and nr.

> +}

Since tdx_clear_page() has a __mb(), it is probably worth checking that this
generates efficient code, considering the loops within loops pattern.

> +
>  static void tdx_no_vcpus_enter_start(struct kvm *kvm)
>  {
>  	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> @@ -340,11 +349,10 @@ static int tdx_reclaim_page(struct page *page)
>  
>  	r = __tdx_reclaim_page(page);
>  	if (!r)
> -		tdx_clear_page(page);
> +		tdx_clear_page(page, PG_LEVEL_4K);
>  	return r;
>  }
>  
> -
>  /*
>   * Reclaim the TD control page(s) which are crypto-protected by TDX guest's
>   * private KeyID.  Assume the cache associated with the TDX private KeyID has
> @@ -588,7 +596,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
>  		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err);
>  		return;
>  	}
> -	tdx_clear_page(kvm_tdx->td.tdr_page);
> +	tdx_clear_page(kvm_tdx->td.tdr_page, PG_LEVEL_4K);

Why not the __tdx_clear_page() variant? The patch adds it, but doesn't really
use it. Just implement it all in tdx_clear_page() then.

>  
>  	__free_page(kvm_tdx->td.tdr_page);
>  	kvm_tdx->td.tdr_page = NULL;
> @@ -1621,7 +1629,8 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
>  		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err);
>  		return -EIO;
>  	}
> -	tdx_clear_page(page);
> +
> +	tdx_clear_page(page, level);
>  	tdx_unpin(kvm, page);
>  	return 0;
>  }

Re: [RFC PATCH 05/21] KVM: TDX: Enhance tdx_clear_page() to support huge pages
Posted by Yan Zhao 7 months ago
On Wed, May 14, 2025 at 03:17:40AM +0800, Edgecombe, Rick P wrote:
> On Thu, 2025-04-24 at 11:05 +0800, Yan Zhao wrote:
> > From: Xiaoyao Li <xiaoyao.li@intel.com>
> > 
> > KVM invokes tdx_clear_page() to zero pages using movdir64b().
> > Include level information to enable tdx_clear_page() to zero a huge page.
> > 
> > [Yan: split out, let tdx_clear_page() accept level]
> > 
> > Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
> > ---
> >  arch/x86/kvm/vmx/tdx.c | 19 ++++++++++++++-----
> >  1 file changed, 14 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 03885cb2869b..1186085795ac 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -276,7 +276,7 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu)
> >  	vcpu->cpu = -1;
> >  }
> >  
> > -static void tdx_clear_page(struct page *page)
> > +static void __tdx_clear_page(struct page *page)
> >  {
> >  	const void *zero_page = (const void *) page_to_virt(ZERO_PAGE(0));
> >  	void *dest = page_to_virt(page);
> > @@ -295,6 +295,15 @@ static void tdx_clear_page(struct page *page)
> >  	__mb();
> >  }
> >  
> > +static void tdx_clear_page(struct page *page, int level)
> > +{
> > +	unsigned long nr = KVM_PAGES_PER_HPAGE(level);
> > +	unsigned long idx = 0;
> > +
> > +	while (nr--)
> > +		__tdx_clear_page(nth_page(page, idx++));
> 
> You shouldn't need both idx and nr.
> 
> > +}
> 
> Since tdx_clear_page() has a __mb(), it is probably worth checking that this
> generates efficient code, considering the loops within loops pattern.
The concern makes sense!

Will convert level to size and use "for (i = 0; i < size; i += 64)" for
movdir64b().

> > +
> >  static void tdx_no_vcpus_enter_start(struct kvm *kvm)
> >  {
> >  	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> > @@ -340,11 +349,10 @@ static int tdx_reclaim_page(struct page *page)
> >  
> >  	r = __tdx_reclaim_page(page);
> >  	if (!r)
> > -		tdx_clear_page(page);
> > +		tdx_clear_page(page, PG_LEVEL_4K);
> >  	return r;
> >  }
> >  
> > -
> >  /*
> >   * Reclaim the TD control page(s) which are crypto-protected by TDX guest's
> >   * private KeyID.  Assume the cache associated with the TDX private KeyID has
> > @@ -588,7 +596,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
> >  		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err);
> >  		return;
> >  	}
> > -	tdx_clear_page(kvm_tdx->td.tdr_page);
> > +	tdx_clear_page(kvm_tdx->td.tdr_page, PG_LEVEL_4K);
> 
> Why not the __tdx_clear_page() variant? The patch adds it, but doesn't really
> use it. Just implement it all in tdx_clear_page() then.
Ok.

> >  
> >  	__free_page(kvm_tdx->td.tdr_page);
> >  	kvm_tdx->td.tdr_page = NULL;
> > @@ -1621,7 +1629,8 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
> >  		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err);
> >  		return -EIO;
> >  	}
> > -	tdx_clear_page(page);
> > +
> > +	tdx_clear_page(page, level);
> >  	tdx_unpin(kvm, page);
> >  	return 0;
> >  }
>