From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
The Physical Address Metadata Table (PAMT) holds TDX metadata for physical
memory and must be allocated by the kernel during TDX module
initialization.
The exact size of the required PAMT memory is determined by the TDX module
and may vary between TDX module versions. Currently it is approximately
0.4% of the system memory. This is a significant commitment, especially if
it is not known upfront whether the machine will run any TDX guests.
For normal PAMT, each memory region that the TDX module might use (TDMR)
needs three separate PAMT allocations. One for each supported page size
(1GB, 2MB, 4KB).
At a high level, Dynamic PAMT still has the 1GB and 2MB levels allocated
on TDX module initialization, but the 4KB level allocated dynamically at
TD runtime. However, in the details, the TDX module still needs some per
4KB page data. The TDX module exposed how many bits per page need to be
allocated (currently it is 1). The bits-per-page value can then be used to
calculate the size to pass in place of the 4KB allocations in the TDMR,
which TDX specs call "PAMT_PAGE_BITMAP".
So in effect, Dynamic PAMT just needs a different (smaller) size
allocation for the 4KB level part of the allocation. Although it is
functionally something different, it is passed in the same way the 4KB page
size PAMT allocation is.
Begin to implement Dynamic PAMT in the kernel by reading the bits-per-page
needed for Dynamic PAMT. Calculate the size needed for the bitmap,
and use it instead of the 4KB size determined for normal PAMT, in the case
of Dynamic PAMT. In doing so, reduce the static allocations to
approximately 0.004%, a 100x improvement.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
[Enhanced log]
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
v3:
- Simplify pamt_4k_size calculation after addition of refactor patch
- Write log
---
arch/x86/include/asm/tdx.h | 5 +++++
arch/x86/include/asm/tdx_global_metadata.h | 1 +
arch/x86/virt/vmx/tdx/tdx.c | 19 ++++++++++++++++++-
arch/x86/virt/vmx/tdx/tdx_global_metadata.c | 3 +++
4 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index eedc479d23f5..b9bb052f4daa 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -107,6 +107,11 @@ int tdx_enable(void);
const char *tdx_dump_mce_info(struct mce *m);
const struct tdx_sys_info *tdx_get_sysinfo(void);
+static inline bool tdx_supports_dynamic_pamt(const struct tdx_sys_info *sysinfo)
+{
+ return false; /* To be enabled when kernel is ready */
+}
+
int tdx_guest_keyid_alloc(void);
u32 tdx_get_nr_guest_keyids(void);
void tdx_guest_keyid_free(unsigned int keyid);
diff --git a/arch/x86/include/asm/tdx_global_metadata.h b/arch/x86/include/asm/tdx_global_metadata.h
index 060a2ad744bf..5eb808b23997 100644
--- a/arch/x86/include/asm/tdx_global_metadata.h
+++ b/arch/x86/include/asm/tdx_global_metadata.h
@@ -15,6 +15,7 @@ struct tdx_sys_info_tdmr {
u16 pamt_4k_entry_size;
u16 pamt_2m_entry_size;
u16 pamt_1g_entry_size;
+ u8 pamt_page_bitmap_entry_bits;
};
struct tdx_sys_info_td_ctrl {
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 38dae825bbb9..4e4aa8927550 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -440,6 +440,18 @@ static int fill_out_tdmrs(struct list_head *tmb_list,
return 0;
}
+static unsigned long tdmr_get_pamt_bitmap_sz(struct tdmr_info *tdmr)
+{
+ unsigned long pamt_sz, nr_pamt_entries;
+ int bits_per_entry;
+
+ bits_per_entry = tdx_sysinfo.tdmr.pamt_page_bitmap_entry_bits;
+ nr_pamt_entries = tdmr->size >> PAGE_SHIFT;
+ pamt_sz = DIV_ROUND_UP(nr_pamt_entries * bits_per_entry, BITS_PER_BYTE);
+
+ return ALIGN(pamt_sz, PAGE_SIZE);
+}
+
/*
* Calculate PAMT size given a TDMR and a page size. The returned
* PAMT size is always aligned up to 4K page boundary.
@@ -504,7 +516,12 @@ static int tdmr_set_up_pamt(struct tdmr_info *tdmr,
* Calculate the PAMT size for each TDX supported page size
* and the total PAMT size.
*/
- tdmr->pamt_4k_size = tdmr_get_pamt_sz(tdmr, TDX_PS_4K, pamt_entry_size);
+ if (tdx_supports_dynamic_pamt(&tdx_sysinfo)) {
+ /* With Dynamic PAMT, PAMT_4K is replaced with a bitmap */
+ tdmr->pamt_4k_size = tdmr_get_pamt_bitmap_sz(tdmr);
+ } else {
+ tdmr->pamt_4k_size = tdmr_get_pamt_sz(tdmr, TDX_PS_4K, pamt_entry_size);
+ }
tdmr->pamt_2m_size = tdmr_get_pamt_sz(tdmr, TDX_PS_2M, pamt_entry_size);
tdmr->pamt_1g_size = tdmr_get_pamt_sz(tdmr, TDX_PS_1G, pamt_entry_size);
tdmr_pamt_size = tdmr->pamt_4k_size + tdmr->pamt_2m_size + tdmr->pamt_1g_size;
diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
index 13ad2663488b..683925bcc9eb 100644
--- a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
+++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
@@ -33,6 +33,9 @@ static int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *sysinfo_tdmr)
sysinfo_tdmr->pamt_2m_entry_size = val;
if (!ret && !(ret = read_sys_metadata_field(0x9100000100000012, &val)))
sysinfo_tdmr->pamt_1g_entry_size = val;
+ if (!ret && tdx_supports_dynamic_pamt(&tdx_sysinfo) &&
+ !(ret = read_sys_metadata_field(0x9100000100000013, &val)))
+ sysinfo_tdmr->pamt_page_bitmap_entry_bits = val;
return ret;
}
--
2.51.0
+ Dan, On 9/19/2025 7:22 AM, Rick Edgecombe wrote: > diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c > index 13ad2663488b..683925bcc9eb 100644 > --- a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c > +++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c > @@ -33,6 +33,9 @@ static int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *sysinfo_tdmr) > sysinfo_tdmr->pamt_2m_entry_size = val; > if (!ret && !(ret = read_sys_metadata_field(0x9100000100000012, &val))) > sysinfo_tdmr->pamt_1g_entry_size = val; > + if (!ret && tdx_supports_dynamic_pamt(&tdx_sysinfo) && > + !(ret = read_sys_metadata_field(0x9100000100000013, &val))) > + sysinfo_tdmr->pamt_page_bitmap_entry_bits = val; > > return ret; > } It looks like the update is not produced by the script[1], but manually, right? Looking at the history git log arch/x86/virt/vmx/tdx/tdx_global_metadata.c It looks the expectation is always adding new fields by updating the script[1] and running it with the latest 'tdx_global_metadata.c'. Dan Williams also expressed internally that we should have checked the script[1] into the kernel. I agree with him and it's never too late to do it. [1]https://lore.kernel.org/kvm/0853b155ec9aac09c594caa60914ed6ea4dc0a71.camel@intel.com/
On Fri, 2025-09-26 at 16:41 +0800, Xiaoyao Li wrote: > It looks like the update is not produced by the script[1], but > manually, right? > > Looking at the history > > git log arch/x86/virt/vmx/tdx/tdx_global_metadata.c > > It looks the expectation is always adding new fields by updating the > script[1] and running it with the latest 'tdx_global_metadata.c'. Yes, it's not symmetrical. It was probably added manually. I don't see why we actually need the the part that makes it unsymmetrical (tdx_supports_dynamic_pamt() check). In fact I think it's bad because it depends on specific call order of the metadata reading to be valid. Kirill, any reason we can't drop it? As for a strict requirement to use the script, it wasn't my understanding. > > Dan Williams also expressed internally that we should have checked > the script[1] into the kernel. I agree with him and it's never too > lateto do it. > > [1] > https://lore.kernel.org/kvm/0853b155ec9aac09c594caa60914ed6ea4dc0a71.camel@intel.com > / Especially while the ABI docs file format is being revisited, it doesn't seem like the right time to check it in. For this patch we could have used it, however at this point it would amount to verifying that it output the same few lines of code. Would you like to do this test? (Assuming we can drop the tdx_supports_dynamic_pamt() check) Probably a note that the code generation matched our manual implementation would be a point of reassurance to add to the log.
On 9/26/25 14:57, Edgecombe, Rick P wrote: > As for a strict requirement to use the script, it wasn't my > understanding. So, remember that all of this started because the TDX docs said that they didn't document some necessary stuff and to go look in a JSON file instead. The JSON file wasn't particularly human-readable, thus the script was born to read it. But it turns out that the JSON isn't stable. It also isn't authoritative enough to write code from by itself. Nobody realized this when this adventure started. Anyway, long story short, the JSON experiment failed and we're back to normal docs for TDX ABI definitions. Thus, the script can go away and maybe we should make that code a bit less ugly now that humans are going to be editing it all the time now.
On 9/19/2025 7:22 AM, Rick Edgecombe wrote: [...] > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c > index 38dae825bbb9..4e4aa8927550 100644 > --- a/arch/x86/virt/vmx/tdx/tdx.c > +++ b/arch/x86/virt/vmx/tdx/tdx.c > @@ -440,6 +440,18 @@ static int fill_out_tdmrs(struct list_head *tmb_list, > return 0; > } > > +static unsigned long tdmr_get_pamt_bitmap_sz(struct tdmr_info *tdmr) > +{ > + unsigned long pamt_sz, nr_pamt_entries; > + int bits_per_entry; > + > + bits_per_entry = tdx_sysinfo.tdmr.pamt_page_bitmap_entry_bits; > + nr_pamt_entries = tdmr->size >> PAGE_SHIFT; > + pamt_sz = DIV_ROUND_UP(nr_pamt_entries * bits_per_entry, BITS_PER_BYTE); > + > + return ALIGN(pamt_sz, PAGE_SIZE); Nit: return PAGE_ALIGN(pamt_sz);
© 2016 - 2025 Red Hat, Inc.