From: Sean Christopherson <seanjc@google.com>
Remove the completely unnecessary assumptions that memory unmapped from a
TDX guest is backed by refcounted struct page memory.
APIs tdh_phymem_page_wbinvd_hkid(), tdx_quirk_reset_page() are used when
unmapping guest private memory from S-EPT. Since mapping of guest private
memory places no requirements on how KVM and guest_memfd manage memory,
neither does guest private memory unmapping.
Rip out the misguided struct page assumptions/constraints by having the two
APIs take PFN directly. This ensures that for future huge page support in
S-EPT, the kernel doesn't pick up even worse assumptions like "a hugepage
must be contained in a single folio".
Use "kvm_pfn_t pfn" for type safety. Using this KVM type is appropriate
since APIs tdh_phymem_page_wbinvd_hkid() and tdx_quirk_reset_page() are
exported to KVM only.
Update mk_keyed_paddr(), which is invoked by tdh_phymem_page_wbinvd_hkid(),
to take PFN as parameter accordingly. Opportunistically, move
mk_keyed_paddr() from tdx.h to tdx.c since there are no external users.
Have tdx_reclaim_page() remain using struct page as parameter since it's
currently not used for removing guest private memory yet.
[Yan: Use kvm_pfn_t, drop reclaim API param update, move mk_keyed_paddr()]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
arch/x86/include/asm/tdx.h | 15 ++-------------
arch/x86/kvm/vmx/tdx.c | 10 +++++-----
arch/x86/virt/vmx/tdx/tdx.c | 16 +++++++++++-----
3 files changed, 18 insertions(+), 23 deletions(-)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index f3f0b1872176..6ceb4cd9ff21 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -153,7 +153,7 @@ int tdx_guest_keyid_alloc(void);
u32 tdx_get_nr_guest_keyids(void);
void tdx_guest_keyid_free(unsigned int keyid);
-void tdx_quirk_reset_page(struct page *page);
+void tdx_quirk_reset_page(kvm_pfn_t pfn);
struct tdx_td {
/* TD root structure: */
@@ -177,17 +177,6 @@ struct tdx_vp {
struct page **tdcx_pages;
};
-static inline u64 mk_keyed_paddr(u16 hkid, struct page *page)
-{
- u64 ret;
-
- ret = page_to_phys(page);
- /* KeyID bits are just above the physical address bits: */
- ret |= (u64)hkid << boot_cpu_data.x86_phys_bits;
-
- return ret;
-}
-
static inline int pg_level_to_tdx_sept_level(enum pg_level level)
{
WARN_ON_ONCE(level == PG_LEVEL_NONE);
@@ -219,7 +208,7 @@ u64 tdh_mem_track(struct tdx_td *tdr);
u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2);
u64 tdh_phymem_cache_wb(bool resume);
u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
-u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, kvm_pfn_t pfn);
#else
static inline void tdx_init(void) { }
static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 1f1abc5b5655..75ad3debcd84 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -343,7 +343,7 @@ static int tdx_reclaim_page(struct page *page)
r = __tdx_reclaim_page(page);
if (!r)
- tdx_quirk_reset_page(page);
+ tdx_quirk_reset_page(page_to_pfn(page));
return r;
}
@@ -597,7 +597,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
return;
- tdx_quirk_reset_page(kvm_tdx->td.tdr_page);
+ tdx_quirk_reset_page(page_to_pfn(kvm_tdx->td.tdr_page));
__free_page(kvm_tdx->td.tdr_page);
kvm_tdx->td.tdr_page = NULL;
@@ -1776,9 +1776,9 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
enum pg_level level, u64 mirror_spte)
{
- struct page *page = pfn_to_page(spte_to_pfn(mirror_spte));
int tdx_level = pg_level_to_tdx_sept_level(level);
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+ kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
gpa_t gpa = gfn_to_gpa(gfn);
u64 err, entry, level_state;
@@ -1817,11 +1817,11 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
return;
- err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page);
+ err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, pfn);
if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
return;
- tdx_quirk_reset_page(page);
+ tdx_quirk_reset_page(pfn);
}
void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index a9dd75190c67..2f9d07ad1a9a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -730,9 +730,9 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
mb();
}
-void tdx_quirk_reset_page(struct page *page)
+void tdx_quirk_reset_page(kvm_pfn_t pfn)
{
- tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE);
+ tdx_quirk_reset_paddr(PFN_PHYS(pfn), PAGE_SIZE);
}
EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page);
@@ -1907,21 +1907,27 @@ u64 tdh_phymem_cache_wb(bool resume)
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_cache_wb);
+static inline u64 mk_keyed_paddr(u16 hkid, kvm_pfn_t pfn)
+{
+ /* KeyID bits are just above the physical address bits. */
+ return PFN_PHYS(pfn) | ((u64)hkid << boot_cpu_data.x86_phys_bits);
+}
+
u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td)
{
struct tdx_module_args args = {};
- args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page);
+ args.rcx = mk_keyed_paddr(tdx_global_keyid, page_to_pfn(td->tdr_page));
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_tdr);
-u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, kvm_pfn_t pfn)
{
struct tdx_module_args args = {};
- args.rcx = mk_keyed_paddr(hkid, page);
+ args.rcx = mk_keyed_paddr(hkid, pfn);
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}
--
2.43.2
On Thu, Mar 19, 2026 at 08:58:08AM +0800, Yan Zhao wrote: > @@ -1817,11 +1817,11 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, > if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm)) > return; > > - err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page); > + err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, pfn); > if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm)) > return; > > - tdx_quirk_reset_page(page); > + tdx_quirk_reset_page(pfn); > } > > void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, The same problem. @level is ignored. -- Kiryl Shutsemau / Kirill A. Shutemov
On 3/19/2026 8:58 AM, Yan Zhao wrote:
> From: Sean Christopherson <seanjc@google.com>
>
> Remove the completely unnecessary assumptions that memory unmapped from a
> TDX guest is backed by refcounted struct page memory.
>
> APIs tdh_phymem_page_wbinvd_hkid(), tdx_quirk_reset_page() are used when
> unmapping guest private memory from S-EPT. Since mapping of guest private
> memory places no requirements on how KVM and guest_memfd manage memory,
> neither does guest private memory unmapping.
>
> Rip out the misguided struct page assumptions/constraints by having the two
> APIs take PFN directly. This ensures that for future huge page support in
> S-EPT, the kernel doesn't pick up even worse assumptions like "a hugepage
> must be contained in a single folio".
>
> Use "kvm_pfn_t pfn" for type safety. Using this KVM type is appropriate
> since APIs tdh_phymem_page_wbinvd_hkid() and tdx_quirk_reset_page() are
> exported to KVM only.
>
> Update mk_keyed_paddr(), which is invoked by tdh_phymem_page_wbinvd_hkid(),
> to take PFN as parameter accordingly. Opportunistically, move
> mk_keyed_paddr() from tdx.h to tdx.c since there are no external users.
>
> Have tdx_reclaim_page() remain using struct page as parameter since it's
> currently not used for removing guest private memory yet.
>
> [Yan: Use kvm_pfn_t, drop reclaim API param update, move mk_keyed_paddr()]
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
> ---
> arch/x86/include/asm/tdx.h | 15 ++-------------
> arch/x86/kvm/vmx/tdx.c | 10 +++++-----
> arch/x86/virt/vmx/tdx/tdx.c | 16 +++++++++++-----
> 3 files changed, 18 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index f3f0b1872176..6ceb4cd9ff21 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -153,7 +153,7 @@ int tdx_guest_keyid_alloc(void);
> u32 tdx_get_nr_guest_keyids(void);
> void tdx_guest_keyid_free(unsigned int keyid);
>
> -void tdx_quirk_reset_page(struct page *page);
> +void tdx_quirk_reset_page(kvm_pfn_t pfn);
>
> struct tdx_td {
> /* TD root structure: */
> @@ -177,17 +177,6 @@ struct tdx_vp {
> struct page **tdcx_pages;
> };
>
> -static inline u64 mk_keyed_paddr(u16 hkid, struct page *page)
> -{
> - u64 ret;
> -
> - ret = page_to_phys(page);
> - /* KeyID bits are just above the physical address bits: */
> - ret |= (u64)hkid << boot_cpu_data.x86_phys_bits;
> -
> - return ret;
> -}
> -
> static inline int pg_level_to_tdx_sept_level(enum pg_level level)
> {
> WARN_ON_ONCE(level == PG_LEVEL_NONE);
> @@ -219,7 +208,7 @@ u64 tdh_mem_track(struct tdx_td *tdr);
> u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2);
> u64 tdh_phymem_cache_wb(bool resume);
> u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
> -u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
> +u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, kvm_pfn_t pfn);
> #else
> static inline void tdx_init(void) { }
> static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 1f1abc5b5655..75ad3debcd84 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -343,7 +343,7 @@ static int tdx_reclaim_page(struct page *page)
>
> r = __tdx_reclaim_page(page);
> if (!r)
> - tdx_quirk_reset_page(page);
> + tdx_quirk_reset_page(page_to_pfn(page));
> return r;
> }
>
> @@ -597,7 +597,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
> if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
> return;
>
> - tdx_quirk_reset_page(kvm_tdx->td.tdr_page);
> + tdx_quirk_reset_page(page_to_pfn(kvm_tdx->td.tdr_page));
>
> __free_page(kvm_tdx->td.tdr_page);
> kvm_tdx->td.tdr_page = NULL;
> @@ -1776,9 +1776,9 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
> static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> enum pg_level level, u64 mirror_spte)
> {
> - struct page *page = pfn_to_page(spte_to_pfn(mirror_spte));
> int tdx_level = pg_level_to_tdx_sept_level(level);
> struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> + kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
> gpa_t gpa = gfn_to_gpa(gfn);
> u64 err, entry, level_state;
>
> @@ -1817,11 +1817,11 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
> return;
>
> - err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page);
> + err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, pfn);
> if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
> return;
>
> - tdx_quirk_reset_page(page);
> + tdx_quirk_reset_page(pfn);
> }
>
> void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index a9dd75190c67..2f9d07ad1a9a 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -730,9 +730,9 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
> mb();
> }
>
> -void tdx_quirk_reset_page(struct page *page)
> +void tdx_quirk_reset_page(kvm_pfn_t pfn)
So why keep the function tdx_quirk_reset_page() but expect passing in
the kvm_pfn_t? It looks werid that the name indicates to reset a page
but what gets passed in is a pfn.
I think we have 2 options:
1. Drop helper tdx_quirk_reset_page() and use tdx_quirk_reset_paddr()
directly.
2. keep tdx_quirk_reset_page() as-is for the cases of tdx_reclaim_page()
and tdx_reclaim_td_control_pages() that have the struct page. But only
change tdx_sept_remove_private_spte() to use tdx_quirk_reset_paddr()
directly.
On Thu, Mar 19, 2026 at 11:20:48AM +0800, Xiaoyao Li wrote:
> On 3/19/2026 8:58 AM, Yan Zhao wrote:
> > From: Sean Christopherson <seanjc@google.com>
> >
> > Remove the completely unnecessary assumptions that memory unmapped from a
> > TDX guest is backed by refcounted struct page memory.
> >
> > APIs tdh_phymem_page_wbinvd_hkid(), tdx_quirk_reset_page() are used when
> > unmapping guest private memory from S-EPT. Since mapping of guest private
> > memory places no requirements on how KVM and guest_memfd manage memory,
> > neither does guest private memory unmapping.
> >
> > Rip out the misguided struct page assumptions/constraints by having the two
> > APIs take PFN directly. This ensures that for future huge page support in
> > S-EPT, the kernel doesn't pick up even worse assumptions like "a hugepage
> > must be contained in a single folio".
> >
> > Use "kvm_pfn_t pfn" for type safety. Using this KVM type is appropriate
> > since APIs tdh_phymem_page_wbinvd_hkid() and tdx_quirk_reset_page() are
> > exported to KVM only.
> >
> > Update mk_keyed_paddr(), which is invoked by tdh_phymem_page_wbinvd_hkid(),
> > to take PFN as parameter accordingly. Opportunistically, move
> > mk_keyed_paddr() from tdx.h to tdx.c since there are no external users.
> >
> > Have tdx_reclaim_page() remain using struct page as parameter since it's
> > currently not used for removing guest private memory yet.
> >
> > [Yan: Use kvm_pfn_t, drop reclaim API param update, move mk_keyed_paddr()]
> >
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
> > ---
> > arch/x86/include/asm/tdx.h | 15 ++-------------
> > arch/x86/kvm/vmx/tdx.c | 10 +++++-----
> > arch/x86/virt/vmx/tdx/tdx.c | 16 +++++++++++-----
> > 3 files changed, 18 insertions(+), 23 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> > index f3f0b1872176..6ceb4cd9ff21 100644
> > --- a/arch/x86/include/asm/tdx.h
> > +++ b/arch/x86/include/asm/tdx.h
> > @@ -153,7 +153,7 @@ int tdx_guest_keyid_alloc(void);
> > u32 tdx_get_nr_guest_keyids(void);
> > void tdx_guest_keyid_free(unsigned int keyid);
> > -void tdx_quirk_reset_page(struct page *page);
> > +void tdx_quirk_reset_page(kvm_pfn_t pfn);
> > struct tdx_td {
> > /* TD root structure: */
> > @@ -177,17 +177,6 @@ struct tdx_vp {
> > struct page **tdcx_pages;
> > };
> > -static inline u64 mk_keyed_paddr(u16 hkid, struct page *page)
> > -{
> > - u64 ret;
> > -
> > - ret = page_to_phys(page);
> > - /* KeyID bits are just above the physical address bits: */
> > - ret |= (u64)hkid << boot_cpu_data.x86_phys_bits;
> > -
> > - return ret;
> > -}
> > -
> > static inline int pg_level_to_tdx_sept_level(enum pg_level level)
> > {
> > WARN_ON_ONCE(level == PG_LEVEL_NONE);
> > @@ -219,7 +208,7 @@ u64 tdh_mem_track(struct tdx_td *tdr);
> > u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2);
> > u64 tdh_phymem_cache_wb(bool resume);
> > u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
> > -u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
> > +u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, kvm_pfn_t pfn);
> > #else
> > static inline void tdx_init(void) { }
> > static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 1f1abc5b5655..75ad3debcd84 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -343,7 +343,7 @@ static int tdx_reclaim_page(struct page *page)
> > r = __tdx_reclaim_page(page);
> > if (!r)
> > - tdx_quirk_reset_page(page);
> > + tdx_quirk_reset_page(page_to_pfn(page));
> > return r;
> > }
> > @@ -597,7 +597,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
> > if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
> > return;
> > - tdx_quirk_reset_page(kvm_tdx->td.tdr_page);
> > + tdx_quirk_reset_page(page_to_pfn(kvm_tdx->td.tdr_page));
> > __free_page(kvm_tdx->td.tdr_page);
> > kvm_tdx->td.tdr_page = NULL;
> > @@ -1776,9 +1776,9 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
> > static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> > enum pg_level level, u64 mirror_spte)
> > {
> > - struct page *page = pfn_to_page(spte_to_pfn(mirror_spte));
> > int tdx_level = pg_level_to_tdx_sept_level(level);
> > struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> > + kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
> > gpa_t gpa = gfn_to_gpa(gfn);
> > u64 err, entry, level_state;
> > @@ -1817,11 +1817,11 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> > if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
> > return;
> > - err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page);
> > + err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, pfn);
> > if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
> > return;
> > - tdx_quirk_reset_page(page);
> > + tdx_quirk_reset_page(pfn);
> > }
> > void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
> > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> > index a9dd75190c67..2f9d07ad1a9a 100644
> > --- a/arch/x86/virt/vmx/tdx/tdx.c
> > +++ b/arch/x86/virt/vmx/tdx/tdx.c
> > @@ -730,9 +730,9 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
> > mb();
> > }
> > -void tdx_quirk_reset_page(struct page *page)
> > +void tdx_quirk_reset_page(kvm_pfn_t pfn)
>
> So why keep the function tdx_quirk_reset_page() but expect passing in the
> kvm_pfn_t? It looks werid that the name indicates to reset a page but what
> gets passed in is a pfn.
I thought about introducing tdx_quirk_reset_pfn(). But considering
tdx_quirk_reset_pfn() has to be an exported API, I'm reluctant to do that.
Given that even with tdx_quirk_reset_pfn(), it still expects TDX convertible
RAM, I think having tdx_quirk_reset_page() to take pfn is still acceptable.
We just don't want KVM to do pfn --> struct page --> pfn conversions.
On 3/19/2026 2:45 PM, Yan Zhao wrote:
> On Thu, Mar 19, 2026 at 11:20:48AM +0800, Xiaoyao Li wrote:
>> On 3/19/2026 8:58 AM, Yan Zhao wrote:
>>> From: Sean Christopherson <seanjc@google.com>
[...]
>>> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
>>> index a9dd75190c67..2f9d07ad1a9a 100644
>>> --- a/arch/x86/virt/vmx/tdx/tdx.c
>>> +++ b/arch/x86/virt/vmx/tdx/tdx.c
>>> @@ -730,9 +730,9 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
>>> mb();
>>> }
>>> -void tdx_quirk_reset_page(struct page *page)
>>> +void tdx_quirk_reset_page(kvm_pfn_t pfn)
>>
>> So why keep the function tdx_quirk_reset_page() but expect passing in the
>> kvm_pfn_t? It looks werid that the name indicates to reset a page but what
>> gets passed in is a pfn.
> I thought about introducing tdx_quirk_reset_pfn(). But considering
> tdx_quirk_reset_pfn() has to be an exported API, I'm reluctant to do that.
>
> Given that even with tdx_quirk_reset_pfn(), it still expects TDX convertible
> RAM, I think having tdx_quirk_reset_page() to take pfn is still acceptable.
>
> We just don't want KVM to do pfn --> struct page --> pfn conversions.
Only tdx_sept_remove_private_spte() is doing such conversions. While
tdx_reclaim_page() and tdx_reclaim_td_control_pages() already have the
struct page natively.
So why not considering option 2?
2. keep tdx_quirk_reset_page() as-is for the cases of
tdx_reclaim_page() and tdx_reclaim_td_control_pages() that have the
struct page. But only change tdx_sept_remove_private_spte() to use
tdx_quirk_reset_paddr() directly.
It will need export tdx_quirk_reset_paddr() for KVM. I think it will be OK?
On Thu, 2026-03-19 at 16:56 +0800, Xiaoyao Li wrote: > > > > } > > > > -void tdx_quirk_reset_page(struct page *page) > > > > +void tdx_quirk_reset_page(kvm_pfn_t pfn) > > > > > > So why keep the function tdx_quirk_reset_page() but expect passing in the > > > kvm_pfn_t? It looks werid that the name indicates to reset a page but what > > > gets passed in is a pfn. The kernel has APIs that take non-struct page arg forms and operate on a "page". For example free_page(), clear_page(), etc. So keeping the "_page" name seems not too horrible to me. > > I thought about introducing tdx_quirk_reset_pfn(). But considering > > tdx_quirk_reset_pfn() has to be an exported API, I'm reluctant to do that. Yea exporting two functions that do the same thing doesn't seem the right balance. > > > > Given that even with tdx_quirk_reset_pfn(), it still expects TDX convertible > > RAM, I think having tdx_quirk_reset_page() to take pfn is still acceptable. > > > > We just don't want KVM to do pfn --> struct page --> pfn conversions. We can assume struct pages have pfn's pretty safely. So pfn->page, and especially allocated from far away code, is the cleanup target here. > > Only tdx_sept_remove_private_spte() is doing such conversions. While > tdx_reclaim_page() and tdx_reclaim_td_control_pages() already have the > struct page natively. > > So why not considering option 2? > > 2. keep tdx_quirk_reset_page() as-is for the cases of > tdx_reclaim_page() and tdx_reclaim_td_control_pages() that have the > struct page. But only change tdx_sept_remove_private_spte() to use > tdx_quirk_reset_paddr() directly. > > It will need export tdx_quirk_reset_paddr() for KVM. I think it will be OK? Exporting tdx_quirk_reset_paddr() seems reasonable, except then we have pfn, PA and struct page across the API. It increases the asymmetry. We did discuss converting the whole API over to PFN for symmetry. It could eliminate the control page and guest memory differences. But this way seems like a more manageable step that addresses the biggest issue. If we don't want to do a massive cleanup, there will be some stuff left for the future.
On Thu, Mar 19, 2026 at 04:56:10PM +0800, Xiaoyao Li wrote: > On 3/19/2026 2:45 PM, Yan Zhao wrote: > > On Thu, Mar 19, 2026 at 11:20:48AM +0800, Xiaoyao Li wrote: > > > On 3/19/2026 8:58 AM, Yan Zhao wrote: > > > > From: Sean Christopherson <seanjc@google.com> > [...] > > > > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c > > > > index a9dd75190c67..2f9d07ad1a9a 100644 > > > > --- a/arch/x86/virt/vmx/tdx/tdx.c > > > > +++ b/arch/x86/virt/vmx/tdx/tdx.c > > > > @@ -730,9 +730,9 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size) > > > > mb(); > > > > } > > > > -void tdx_quirk_reset_page(struct page *page) > > > > +void tdx_quirk_reset_page(kvm_pfn_t pfn) > > > > > > So why keep the function tdx_quirk_reset_page() but expect passing in the > > > kvm_pfn_t? It looks werid that the name indicates to reset a page but what > > > gets passed in is a pfn. > > I thought about introducing tdx_quirk_reset_pfn(). But considering > > tdx_quirk_reset_pfn() has to be an exported API, I'm reluctant to do that. > > > > Given that even with tdx_quirk_reset_pfn(), it still expects TDX convertible > > RAM, I think having tdx_quirk_reset_page() to take pfn is still acceptable. > > > > We just don't want KVM to do pfn --> struct page --> pfn conversions. > > Only tdx_sept_remove_private_spte() is doing such conversions. While > tdx_reclaim_page() and tdx_reclaim_td_control_pages() already have the > struct page natively. Unlike requiring KVM to call pfn_to_page() before invoking guest private memory related APIs, Having tdx_reclaim_page() and tdx_reclaim_td_control_pages() to call page_to_pfn() does not impose unnecessary assumptions of how KVM allocates memory. So, I think it's fine for them to invoke tdx_quirk_reset_page() which takes PFN as input. > So why not considering option 2? > > 2. keep tdx_quirk_reset_page() as-is for the cases of > tdx_reclaim_page() and tdx_reclaim_td_control_pages() that have the > struct page. But only change tdx_sept_remove_private_spte() to use > tdx_quirk_reset_paddr() directly. > > It will need export tdx_quirk_reset_paddr() for KVM. I think it will be OK? I don't think it's necessary. But if we have to export an extra API, IMHO, tdx_quirk_reset_pfn() is better than tdx_quirk_reset_paddr(). Otherwise, why not only expose tdx_quirk_reset_paddr()?
On 3/19/26 09:56, Yan Zhao wrote: > On Thu, Mar 19, 2026 at 04:56:10PM +0800, Xiaoyao Li wrote: >> So why not considering option 2? >> >> 2. keep tdx_quirk_reset_page() as-is for the cases of >> tdx_reclaim_page() and tdx_reclaim_td_control_pages() that have the >> struct page. But only change tdx_sept_remove_private_spte() to use >> tdx_quirk_reset_paddr() directly. >> >> It will need export tdx_quirk_reset_paddr() for KVM. I think it will be OK? > I don't think it's necessary. But if we have to export an extra API, IMHO, > tdx_quirk_reset_pfn() is better than tdx_quirk_reset_paddr(). Otherwise, > why not only expose tdx_quirk_reset_paddr()? That works for me, it seems the cleanest. Paolo
© 2016 - 2026 Red Hat, Inc.