[RFC, PATCH 09/12] KVM: TDX: Preallocate PAMT pages to be used in page fault path

Kirill A. Shutemov posted 12 patches 9 months, 1 week ago
There is a newer version of this series
[RFC, PATCH 09/12] KVM: TDX: Preallocate PAMT pages to be used in page fault path
Posted by Kirill A. Shutemov 9 months, 1 week ago
Preallocate a page to be used in the link_external_spt() and
set_external_spte() paths.

In the worst-case scenario, handling a page fault might require a
tdx_nr_pamt_pages() pages for each page table level.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/mmu/mmu.c          | 10 ++++++++++
 2 files changed, 12 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 91958c55f918..a5661499a176 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -849,6 +849,8 @@ struct kvm_vcpu_arch {
 	 */
 	struct kvm_mmu_memory_cache mmu_external_spt_cache;
 
+	struct kvm_mmu_memory_cache pamt_page_cache;
+
 	/*
 	 * QEMU userspace and the guest each have their own FPU state.
 	 * In vcpu_run, we switch between the user and guest FPU contexts.
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a284dce227a0..7bfa0dc50440 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -616,6 +616,15 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
 		if (r)
 			return r;
 	}
+
+	if (vcpu->kvm->arch.vm_type == KVM_X86_TDX_VM) {
+		int nr = tdx_nr_pamt_pages(tdx_get_sysinfo());
+		r = kvm_mmu_topup_memory_cache(&vcpu->arch.pamt_page_cache,
+					       nr * PT64_ROOT_MAX_LEVEL);
+		if (r)
+			return r;
+	}
+
 	return kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
 					  PT64_ROOT_MAX_LEVEL);
 }
@@ -626,6 +635,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_external_spt_cache);
+	kvm_mmu_free_memory_cache(&vcpu->arch.pamt_page_cache);
 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
 }
 
-- 
2.47.2
Re: [RFC, PATCH 09/12] KVM: TDX: Preallocate PAMT pages to be used in page fault path
Posted by Chao Gao 9 months ago
On Fri, May 02, 2025 at 04:08:25PM +0300, Kirill A. Shutemov wrote:
>Preallocate a page to be used in the link_external_spt() and
>set_external_spte() paths.
>
>In the worst-case scenario, handling a page fault might require a
>tdx_nr_pamt_pages() pages for each page table level.
>
>Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
>---
> arch/x86/include/asm/kvm_host.h |  2 ++
> arch/x86/kvm/mmu/mmu.c          | 10 ++++++++++
> 2 files changed, 12 insertions(+)
>
>diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>index 91958c55f918..a5661499a176 100644
>--- a/arch/x86/include/asm/kvm_host.h
>+++ b/arch/x86/include/asm/kvm_host.h
>@@ -849,6 +849,8 @@ struct kvm_vcpu_arch {
> 	 */
> 	struct kvm_mmu_memory_cache mmu_external_spt_cache;
> 
>+	struct kvm_mmu_memory_cache pamt_page_cache;
>+
> 	/*
> 	 * QEMU userspace and the guest each have their own FPU state.
> 	 * In vcpu_run, we switch between the user and guest FPU contexts.
>diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>index a284dce227a0..7bfa0dc50440 100644
>--- a/arch/x86/kvm/mmu/mmu.c
>+++ b/arch/x86/kvm/mmu/mmu.c
>@@ -616,6 +616,15 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
> 		if (r)
> 			return r;
> 	}
>+
>+	if (vcpu->kvm->arch.vm_type == KVM_X86_TDX_VM) {

The check for vcpu->kvm->arch.vm_type == KVM_X86_TDX_VM is identical to
kvm_has_mirrored_tdp() a few lines above.

>+		int nr = tdx_nr_pamt_pages(tdx_get_sysinfo());

Since you're already accessing tdx_sysinfo, you can check if dynamic PAMT is
enabled and allocate the pamt page cache accordingly.

>+		r = kvm_mmu_topup_memory_cache(&vcpu->arch.pamt_page_cache,
>+					       nr * PT64_ROOT_MAX_LEVEL);
>+		if (r)
>+			return r;
>+	}
>+
> 	return kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
> 					  PT64_ROOT_MAX_LEVEL);
> }
>@@ -626,6 +635,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
> 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
> 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
> 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_external_spt_cache);
>+	kvm_mmu_free_memory_cache(&vcpu->arch.pamt_page_cache);
> 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
> }
> 
>-- 
>2.47.2
>
Re: [RFC, PATCH 09/12] KVM: TDX: Preallocate PAMT pages to be used in page fault path
Posted by Kirill A. Shutemov 8 months, 2 weeks ago
On Wed, May 14, 2025 at 02:30:34PM +0800, Chao Gao wrote:
> On Fri, May 02, 2025 at 04:08:25PM +0300, Kirill A. Shutemov wrote:
> >Preallocate a page to be used in the link_external_spt() and
> >set_external_spte() paths.
> >
> >In the worst-case scenario, handling a page fault might require a
> >tdx_nr_pamt_pages() pages for each page table level.
> >
> >Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> >---
> > arch/x86/include/asm/kvm_host.h |  2 ++
> > arch/x86/kvm/mmu/mmu.c          | 10 ++++++++++
> > 2 files changed, 12 insertions(+)
> >
> >diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >index 91958c55f918..a5661499a176 100644
> >--- a/arch/x86/include/asm/kvm_host.h
> >+++ b/arch/x86/include/asm/kvm_host.h
> >@@ -849,6 +849,8 @@ struct kvm_vcpu_arch {
> > 	 */
> > 	struct kvm_mmu_memory_cache mmu_external_spt_cache;
> > 
> >+	struct kvm_mmu_memory_cache pamt_page_cache;
> >+
> > 	/*
> > 	 * QEMU userspace and the guest each have their own FPU state.
> > 	 * In vcpu_run, we switch between the user and guest FPU contexts.
> >diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> >index a284dce227a0..7bfa0dc50440 100644
> >--- a/arch/x86/kvm/mmu/mmu.c
> >+++ b/arch/x86/kvm/mmu/mmu.c
> >@@ -616,6 +616,15 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
> > 		if (r)
> > 			return r;
> > 	}
> >+
> >+	if (vcpu->kvm->arch.vm_type == KVM_X86_TDX_VM) {
> 
> The check for vcpu->kvm->arch.vm_type == KVM_X86_TDX_VM is identical to
> kvm_has_mirrored_tdp() a few lines above.

Well, yes. But I think it is conceptually different. There can be
different virtualization mode that has mirrored TDP which is not TDX.

> 
> >+		int nr = tdx_nr_pamt_pages(tdx_get_sysinfo());
> 
> Since you're already accessing tdx_sysinfo, you can check if dynamic PAMT is
> enabled and allocate the pamt page cache accordingly.

I will hide it in tdx_nr_pamt_pages() which would return 0 if Dynamic PAMT
is disabled.


-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [RFC, PATCH 09/12] KVM: TDX: Preallocate PAMT pages to be used in page fault path
Posted by Huang, Kai 9 months ago
On Fri, 2025-05-02 at 16:08 +0300, Kirill A. Shutemov wrote:
> Preallocate a page to be used in the link_external_spt() and
> set_external_spte() paths.
> 
> In the worst-case scenario, handling a page fault might require a
> tdx_nr_pamt_pages() pages for each page table level.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/mmu/mmu.c          | 10 ++++++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 91958c55f918..a5661499a176 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -849,6 +849,8 @@ struct kvm_vcpu_arch {
>  	 */
>  	struct kvm_mmu_memory_cache mmu_external_spt_cache;
>  
> +	struct kvm_mmu_memory_cache pamt_page_cache;
> +
>  	/*
>  	 * QEMU userspace and the guest each have their own FPU state.
>  	 * In vcpu_run, we switch between the user and guest FPU contexts.
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index a284dce227a0..7bfa0dc50440 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -616,6 +616,15 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
>  		if (r)
>  			return r;
>  	}
> +
> +	if (vcpu->kvm->arch.vm_type == KVM_X86_TDX_VM) {
> +		int nr = tdx_nr_pamt_pages(tdx_get_sysinfo());
> +		r = kvm_mmu_topup_memory_cache(&vcpu->arch.pamt_page_cache,
> +					       nr * PT64_ROOT_MAX_LEVEL);
> +		if (r)
> +			return r;
> +	}
> +
>  	return kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
>  					  PT64_ROOT_MAX_LEVEL);
>  }
> @@ -626,6 +635,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
>  	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
>  	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
>  	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_external_spt_cache);
> +	kvm_mmu_free_memory_cache(&vcpu->arch.pamt_page_cache);
>  	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
>  }
>  

IIUC, this patch can be avoided if we create an actual kmem_cache for
mmu_external_spt_cache with an actual 'ctor' where we simply call
tdx_alloc_page() as replied to the previous patch.