Add helper functions and definitions needed to use broadcast TLB
invalidation on AMD EPYC 3 and newer CPUs.
All the functions defined in invlpgb.h are used later in the series.
Compile time disabling X86_FEATURE_INVLPGB when the config
option is not set allows the compiler to omit unnecessary code.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
Tested-by: Brendan Jackman <jackmanb@google.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
arch/x86/include/asm/disabled-features.h | 8 +-
arch/x86/include/asm/tlb.h | 98 ++++++++++++++++++++++++
2 files changed, 105 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index c492bdc97b05..625a89259968 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -129,6 +129,12 @@
#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
#endif
+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
+#define DISABLE_INVLPGB 0
+#else
+#define DISABLE_INVLPGB (1 << (X86_FEATURE_INVLPGB & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -146,7 +152,7 @@
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13 0
+#define DISABLED_MASK13 (DISABLE_INVLPGB)
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 77f52bc1578a..91c9a4da3ace 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
static inline void tlb_flush(struct mmu_gather *tlb)
{
@@ -25,4 +28,99 @@ static inline void invlpg(unsigned long addr)
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
}
+
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond
+ * the first page, while __invlpgb gets the more human readable number of
+ * pages to invalidate.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ bool pmd_stride, u8 flags)
+{
+ u32 edx = (pcid << 16) | asid;
+ u32 ecx = (pmd_stride << 31) | (nr_pages - 1);
+ u64 rax = addr | flags;
+
+ /* The low bits in rax are for flags. Verify addr is clean. */
+ VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+ /* INVLPGB; supported in binutils >= 2.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __tlbsync(void)
+{
+ /*
+ * tlbsync waits for invlpgb instructions originating on the
+ * same CPU to have completed. Print a warning if we could have
+ * migrated, and might not be waiting on all the invlpgbs issued
+ * during this TLB invalidation sequence.
+ */
+ cant_migrate();
+
+ /* TLBSYNC: supported in binutils >= 0.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID
+ *
+ * The first can be used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_VA BIT(0)
+#define INVLPGB_PCID BIT(1)
+#define INVLPGB_ASID BIT(2)
+#define INVLPGB_INCLUDE_GLOBAL BIT(3)
+#define INVLPGB_FINAL_ONLY BIT(4)
+#define INVLPGB_INCLUDE_NESTED BIT(5)
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr,
+ bool pmd_stride)
+{
+ __invlpgb(0, pcid, addr, nr, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+ __invlpgb(0, pcid, 0, 1, 0, INVLPGB_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+ __invlpgb(0, 0, 0, 1, 0, INVLPGB_INCLUDE_GLOBAL);
+ __tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+ __invlpgb(0, 0, addr, nr, 0, INVLPGB_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+ __invlpgb(0, 0, 0, 1, 0, 0);
+ __tlbsync();
+}
+
#endif /* _ASM_X86_TLB_H */
--
2.47.1
On Tue, Feb 25, 2025 at 10:00:38PM -0500, Rik van Riel wrote:
> Add helper functions and definitions needed to use broadcast TLB
> invalidation on AMD EPYC 3 and newer CPUs.
>
> All the functions defined in invlpgb.h are used later in the series.
>
> Compile time disabling X86_FEATURE_INVLPGB when the config
> option is not set allows the compiler to omit unnecessary code.
>
> Signed-off-by: Rik van Riel <riel@surriel.com>
> Tested-by: Manali Shukla <Manali.Shukla@amd.com>
> Tested-by: Brendan Jackman <jackmanb@google.com>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> Acked-by: Dave Hansen <dave.hansen@intel.com>
> ---
> arch/x86/include/asm/disabled-features.h | 8 +-
> arch/x86/include/asm/tlb.h | 98 ++++++++++++++++++++++++
> 2 files changed, 105 insertions(+), 1 deletion(-)
My edits ontop.
x86/cpu has dropped {disabled,required}-features.h in favor of a new, better
mechanism to compile-time disable X86 features, see below.
--- /tmp/current.patch 2025-02-28 20:44:40.765404608 +0100
+++ /tmp/0001-x86-mm-Add-INVLPGB-support-code.patch 2025-02-28 20:44:18.492326903 +0100
@@ -1,55 +1,38 @@
+From ce22946ea806ae459b4d88767a59b010e70682d5 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
-Date: Tue, 25 Feb 2025 22:00:38 -0500
-Subject: x86/mm: Add INVLPGB support code
+Date: Fri, 28 Feb 2025 20:32:30 +0100
+Subject: [PATCH] x86/mm: Add INVLPGB support code
Add helper functions and definitions needed to use broadcast TLB
-invalidation on AMD EPYC 3 and newer CPUs.
+invalidation on AMD CPUs.
-All the functions defined in invlpgb.h are used later in the series.
-
-Compile time disabling X86_FEATURE_INVLPGB when the config
-option is not set allows the compiler to omit unnecessary code.
+ [ bp:
+ - Cleanup commit message
+ - port it to new Kconfig.cpufeatures machinery
+ - add a comment about flushing any PCID and ASID ]
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
-Acked-by: Dave Hansen <dave.hansen@intel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
-Tested-by: Brendan Jackman <jackmanb@google.com>
-Tested-by: Michael Kelley <mhklinux@outlook.com>
Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com
---
- arch/x86/include/asm/disabled-features.h | 8 +-
- arch/x86/include/asm/tlb.h | 98 ++++++++++++++++++++++++
- 2 files changed, 105 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
-index c492bdc97b05..625a89259968 100644
---- a/arch/x86/include/asm/disabled-features.h
-+++ b/arch/x86/include/asm/disabled-features.h
-@@ -129,6 +129,12 @@
- #define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
- #endif
-
-+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
-+#define DISABLE_INVLPGB 0
-+#else
-+#define DISABLE_INVLPGB (1 << (X86_FEATURE_INVLPGB & 31))
-+#endif
-+
- /*
- * Make sure to add features to the correct mask
- */
-@@ -146,7 +152,7 @@
- #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
- DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
- #define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
--#define DISABLED_MASK13 0
-+#define DISABLED_MASK13 (DISABLE_INVLPGB)
- #define DISABLED_MASK14 0
- #define DISABLED_MASK15 0
- #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
+ arch/x86/Kconfig.cpufeatures | 4 ++
+ arch/x86/include/asm/tlb.h | 101 +++++++++++++++++++++++++++++++++++
+ 2 files changed, 105 insertions(+)
+
+diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures
+index 5dcc49d928c5..f9af51205f07 100644
+--- a/arch/x86/Kconfig.cpufeatures
++++ b/arch/x86/Kconfig.cpufeatures
+@@ -195,3 +195,7 @@ config X86_DISABLED_FEATURE_FRED
+ config X86_DISABLED_FEATURE_SEV_SNP
+ def_bool y
+ depends on !KVM_AMD_SEV
++
++config X86_DISABLED_FEATURE_BROADCAST_TLB_FLUSH
++ def_bool y
++ depends on !X86_BROADCAST_TLB_FLUSH
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
-index 77f52bc1578a..91c9a4da3ace 100644
+index 77f52bc1578a..45d9c7687d61 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
@@ -62,7 +45,7 @@ index 77f52bc1578a..91c9a4da3ace 100644
static inline void tlb_flush(struct mmu_gather *tlb)
{
-@@ -25,4 +28,99 @@ static inline void invlpg(unsigned long addr)
+@@ -25,4 +28,102 @@ static inline void invlpg(unsigned long addr)
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
}
@@ -157,11 +140,14 @@ index 77f52bc1578a..91c9a4da3ace 100644
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
++ /*
++ * @addr=0 means both rax[1] (valid PCID) and rax[2] (valid ASID) are clear
++ * so flush *any* PCID and ASID.
++ */
+ __invlpgb(0, 0, 0, 1, 0, 0);
+ __tlbsync();
+}
-+
#endif /* _ASM_X86_TLB_H */
--
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 2/28/25 11:47, Borislav Petkov wrote:
> @@ -157,11 +140,14 @@ index 77f52bc1578a..91c9a4da3ace 100644
> +/* Flush all mappings for all PCIDs except globals. */
> +static inline void invlpgb_flush_all_nonglobals(void)
> +{
> ++ /*
> ++ * @addr=0 means both rax[1] (valid PCID) and rax[2] (valid ASID) are clear
> ++ * so flush *any* PCID and ASID.
> ++ */
> + __invlpgb(0, 0, 0, 1, 0, 0);
> + __tlbsync();
> +}
I had a bit of an allergic reaction to all of the magic numbers.
Could we do something like the attached where we give a _few_ of the
magic numbers some symbolic names?
For instance, instead of passing around a bool for pmd_stride, this uses
an enum. It also explicitly separates things that are setting
pmd_stride=0 but are really saying "this is a 4k stride" from things
that set pmd_stride=0 but are for operations that don't _have_ a stride.
Here's a plain diff if you just want to squish it in.
On Mon, Mar 03, 2025 at 11:23:58AM -0800, Dave Hansen wrote:
> Here's a plain diff if you just want to squish it in.
> diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
> index 5375145eb9596..3bd617c204346 100644
> --- a/arch/x86/include/asm/tlb.h
> +++ b/arch/x86/include/asm/tlb.h
> @@ -28,6 +28,11 @@ static inline void invlpg(unsigned long addr)
> asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
> }
>
> +enum invlpgb_stride {
Right, this is an address stride, as the text calls it.
> + NO_STRIDE = 0,
> + PTE_STRIDE = 0,
Ok, so those are confusing. No stride is PTE stride so let's just zap
NO_STRIDE.
> + PMD_STRIDE = 1
> +};
>
> /*
> * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
...
> static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
> unsigned long addr,
> u16 nr,
> bool pmd_stride)
You're relying on the fact that true == PMD_STRIDE and false to PTE_STRIDE but
let's make it Right(tm), see below.
Rest looks ok.
IOW, I'm merging this into patch 3:
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 5375145eb959..6718835c3b0c 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -28,6 +28,10 @@ static inline void invlpg(unsigned long addr)
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
}
+enum addr_stride {
+ PTE_STRIDE = 0,
+ PMD_STRIDE = 1
+};
/*
* INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
@@ -48,10 +52,10 @@ static inline void invlpg(unsigned long addr)
*/
static inline void __invlpgb(unsigned long asid, unsigned long pcid,
unsigned long addr, u16 nr_pages,
- bool pmd_stride, u8 flags)
+ enum addr_stride stride, u8 flags)
{
u32 edx = (pcid << 16) | asid;
- u32 ecx = (pmd_stride << 31) | (nr_pages - 1);
+ u32 ecx = (stride << 31) | (nr_pages - 1);
u64 rax = addr | flags;
/* The low bits in rax are for flags. Verify addr is clean. */
@@ -78,33 +82,38 @@ static inline void __tlbsync(void)
/*
* INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
* of the three. For example:
- * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
- * - INVLPGB_PCID: invalidate all TLB entries matching the PCID
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID: invalidate all TLB entries matching the PCID
*
- * The first can be used to invalidate (kernel) mappings at a particular
+ * The first is used to invalidate (kernel) mappings at a particular
* address across all processes.
*
* The latter invalidates all TLB entries matching a PCID.
*/
-#define INVLPGB_VA BIT(0)
-#define INVLPGB_PCID BIT(1)
-#define INVLPGB_ASID BIT(2)
-#define INVLPGB_INCLUDE_GLOBAL BIT(3)
-#define INVLPGB_FINAL_ONLY BIT(4)
-#define INVLPGB_INCLUDE_NESTED BIT(5)
+#define INVLPGB_FLAG_VA BIT(0)
+#define INVLPGB_FLAG_PCID BIT(1)
+#define INVLPGB_FLAG_ASID BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS 0UL
static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
unsigned long addr,
- u16 nr,
- bool pmd_stride)
+ u16 nr, bool stride)
{
- __invlpgb(0, pcid, addr, nr, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+ enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+ u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+ __invlpgb(0, pcid, addr, nr, str, flags);
}
/* Flush all mappings for a given PCID, not including globals. */
static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
{
- __invlpgb(0, pcid, 0, 1, 0, INVLPGB_PCID);
+ __invlpgb(0, pcid, 0, 1, PTE_STRIDE, INVLPGB_FLAG_PCID);
}
/* Flush all mappings, including globals, for all PCIDs. */
@@ -117,21 +126,21 @@ static inline void invlpgb_flush_all(void)
* as it is cheaper.
*/
guard(preempt)();
- __invlpgb(0, 0, 0, 1, 0, INVLPGB_INCLUDE_GLOBAL);
+ __invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
__tlbsync();
}
/* Flush addr, including globals, for all PCIDs. */
static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
{
- __invlpgb(0, 0, addr, nr, 0, INVLPGB_INCLUDE_GLOBAL);
+ __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
}
/* Flush all mappings for all PCIDs except globals. */
static inline void invlpgb_flush_all_nonglobals(void)
{
guard(preempt)();
- __invlpgb(0, 0, 0, 1, 0, 0);
+ __invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_MODE_ALL_NONGLOBALS);
__tlbsync();
}
#endif /* _ASM_X86_TLB_H */
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 3/4/25 03:00, Borislav Petkov wrote:
> On Mon, Mar 03, 2025 at 11:23:58AM -0800, Dave Hansen wrote:
>> Here's a plain diff if you just want to squish it in.
>
>> diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
>> index 5375145eb9596..3bd617c204346 100644
>> --- a/arch/x86/include/asm/tlb.h
>> +++ b/arch/x86/include/asm/tlb.h
>> @@ -28,6 +28,11 @@ static inline void invlpg(unsigned long addr)
>> asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
>> }
>>
>> +enum invlpgb_stride {
>
> Right, this is an address stride, as the text calls it.
>
>> + NO_STRIDE = 0,
>> + PTE_STRIDE = 0,
>
> Ok, so those are confusing. No stride is PTE stride so let's just zap
> NO_STRIDE.
Passing "PTE_STRIDE" to an operation that doesn't have a stride is
pretty confusing too.
...
> /* Flush all mappings, including globals, for all PCIDs. */
> @@ -117,21 +126,21 @@ static inline void invlpgb_flush_all(void)
> * as it is cheaper.
> */
> guard(preempt)();
> - __invlpgb(0, 0, 0, 1, 0, INVLPGB_INCLUDE_GLOBAL);
> + __invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
> __tlbsync();
> }
This one, for example. It's not flushing PTEs an doesn't have a start
address or nr>0.
So, we could have the enum be totally divorced from the hardware type:
NO_STRIDE,
PTE_STRIDE,
PMD_STRIDE
and decode it at the end:
if (stride == PMD_STRIDE)
foo | PMD_STRIDE_BIT;
On Tue, Mar 04, 2025 at 07:10:13AM -0800, Dave Hansen wrote:
> So, we could have the enum be totally divorced from the hardware type:
>
> NO_STRIDE,
> PTE_STRIDE,
> PMD_STRIDE
How about we completely hide that NO_STRIDE thing and do a __invlpgb_all()
"sub-helper" which is basically telling you it is invalidating all kinds of
TLB entries and stride does not apply there:
---
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index e8561a846754..361b3dde2656 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -66,6 +66,12 @@ static inline void __invlpgb(unsigned long asid, unsigned long pcid,
asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
}
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages, u8 flags)
+{
+ __invlpgb(asid, pcid, addr, nr_pages, 0, flags);
+}
+
static inline void __tlbsync(void)
{
/*
@@ -84,6 +90,8 @@ static inline void __tlbsync(void)
static inline void __invlpgb(unsigned long asid, unsigned long pcid,
unsigned long addr, u16 nr_pages,
enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages, u8 flags) { }
static inline void __tlbsync(void) { }
#endif
@@ -121,7 +129,7 @@ static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
/* Flush all mappings for a given PCID, not including globals. */
static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
{
- __invlpgb(0, pcid, 0, 1, PTE_STRIDE, INVLPGB_FLAG_PCID);
+ __invlpgb_all(0, pcid, 0, 1, INVLPGB_FLAG_PCID);
}
/* Flush all mappings, including globals, for all PCIDs. */
@@ -134,7 +142,7 @@ static inline void invlpgb_flush_all(void)
* as it is cheaper.
*/
guard(preempt)();
- __invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+ __invlpgb_all(0, 0, 0, 1, INVLPGB_FLAG_INCLUDE_GLOBAL);
__tlbsync();
}
@@ -148,7 +156,7 @@ static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
static inline void invlpgb_flush_all_nonglobals(void)
{
guard(preempt)();
- __invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_MODE_ALL_NONGLOBALS);
+ __invlpgb_all(0, 0, 0, 1, INVLPGB_MODE_ALL_NONGLOBALS);
__tlbsync();
}
#endif /* _ASM_X86_TLB_H */
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 3/4/25 08:19, Borislav Petkov wrote:
> +static inline void __invlpgb_all(unsigned long asid, unsigned long pcid,
> + unsigned long addr, u16 nr_pages, u8 flags)
> +{
> + __invlpgb(asid, pcid, addr, nr_pages, 0, flags);
> +}
Why would __invlpg_all() need an 'addr' or 'nr_pages'? Shouldn't those be 0?
It's _better_ of course when it happens at a single site and it's close
to a prototype for __invlpgb(). But it's still a magic '0' that it's
impossible to make sense of without looking at the prototype.
Looking at the APM again... there really are three possible values for
ECX[31]:
0: increment by 4k
1: increment by 2M
X: Don't care, no increment is going to happen
What you wrote above could actually be written:
__invlpgb(asid, pcid, addr, nr_pages, 1, flags);
so the 0/1 is _actually_ completely random and arbitrary as far as the
spec goes.
Why does it matter?
It enables you to do sanity checking. For example, we could actually
enforce a rule that "no stride" can't be paired with any of the
per-address invalidation characteristics:
if (stride == NO_STRIDE) {
WARN_ON(flags & INVLPGB_FLAG_VA);
WARN_ON(addr);
WARN_ON(nr_pages);
}
That's impossible if you pass a 'bool' in.
But, honestly, I'm deep into nitpick mode here. I think differentiating
the three cases is worth it, but it's also not the hill I'm going to die
on. ;)
On Tue, Mar 04, 2025 at 08:57:30AM -0800, Dave Hansen wrote:
> Why would __invlpg_all() need an 'addr' or 'nr_pages'? Shouldn't those be 0?
Yap, good idea. It makes the _all helper even better:
static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
{
__invlpgb(asid, pcid, 0, 1, 0, flags);
}
> It's _better_ of course when it happens at a single site and it's close
> to a prototype for __invlpgb(). But it's still a magic '0' that it's
> impossible to make sense of without looking at the prototype.
Yes.
> Looking at the APM again... there really are three possible values for
> ECX[31]:
>
> 0: increment by 4k
> 1: increment by 2M
> X: Don't care, no increment is going to happen
>
> What you wrote above could actually be written:
>
> __invlpgb(asid, pcid, addr, nr_pages, 1, flags);
>
> so the 0/1 is _actually_ completely random and arbitrary as far as the
> spec goes.
Yes.
> Why does it matter?
>
> It enables you to do sanity checking. For example, we could actually
> enforce a rule that "no stride" can't be paired with any of the
> per-address invalidation characteristics:
>
> if (stride == NO_STRIDE) {
> WARN_ON(flags & INVLPGB_FLAG_VA);
> WARN_ON(addr);
> WARN_ON(nr_pages);
> }
>
> That's impossible if you pass a 'bool' in.
>
> But, honestly, I'm deep into nitpick mode here. I think differentiating
> the three cases is worth it, but it's also not the hill I'm going to die
> on. ;)
Yap, and now I've massaged it so much so that it doesn't really need that
checking. Because I have exactly two calls which use the stride:
1.
static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
unsigned long addr,
u16 nr, bool stride)
{
enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
__invlpgb(0, pcid, addr, nr, str, flags);
}
This one is fine - I verify it.
2.
/* Flush addr, including globals, for all PCIDs. */
static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
{
__invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
}
This one controls it already.
So the only case where something could go bad is when one would use
__invlpgb() directly and that should hopefully be caught early enough.
But if you really want, I could add sanitization to __invlpgb() to massage it
into the right stride. And print a single warning - the big fat WARN* in an
inline functions are probably too much. Hm, I dunno...
Current diff ontop:
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index e8561a846754..8ab21487d6ee 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -66,6 +66,11 @@ static inline void __invlpgb(unsigned long asid, unsigned long pcid,
asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
}
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+ __invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
static inline void __tlbsync(void)
{
/*
@@ -84,6 +89,7 @@ static inline void __tlbsync(void)
static inline void __invlpgb(unsigned long asid, unsigned long pcid,
unsigned long addr, u16 nr_pages,
enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
static inline void __tlbsync(void) { }
#endif
@@ -121,7 +127,7 @@ static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
/* Flush all mappings for a given PCID, not including globals. */
static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
{
- __invlpgb(0, pcid, 0, 1, PTE_STRIDE, INVLPGB_FLAG_PCID);
+ __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
}
/* Flush all mappings, including globals, for all PCIDs. */
@@ -134,7 +140,7 @@ static inline void invlpgb_flush_all(void)
* as it is cheaper.
*/
guard(preempt)();
- __invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+ __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
__tlbsync();
}
@@ -148,7 +154,7 @@ static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
static inline void invlpgb_flush_all_nonglobals(void)
{
guard(preempt)();
- __invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_MODE_ALL_NONGLOBALS);
+ __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
__tlbsync();
}
#endif /* _ASM_X86_TLB_H */
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 2/25/25 19:00, Rik van Riel wrote: > Add helper functions and definitions needed to use broadcast TLB > invalidation on AMD EPYC 3 and newer CPUs. I don't know if I mentioned it earlier, but I'd leave this explanation of where the feature shows up for the cover letter or the Documentation/.
On Tue, Feb 25, 2025 at 10:00:38PM -0500, Rik van Riel wrote:
> Add helper functions and definitions needed to use broadcast TLB
> invalidation on AMD EPYC 3 and newer CPUs.
>
> All the functions defined in invlpgb.h are used later in the series.
Uff, that's tlb.h now. As already said. :-\
Btw, this is why there's no point to write *what* the patch does - that is
visible from the diff itself. This sentence is simply not needed.
> Compile time disabling X86_FEATURE_INVLPGB when the config
> option is not set allows the compiler to omit unnecessary code.
>
> Signed-off-by: Rik van Riel <riel@surriel.com>
> Tested-by: Manali Shukla <Manali.Shukla@amd.com>
> Tested-by: Brendan Jackman <jackmanb@google.com>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> Acked-by: Dave Hansen <dave.hansen@intel.com>
And I asked you already but still crickets:
What do those Tested-by tags mean if you keep changing the patches?!
https://lore.kernel.org/r/20250224123142.GFZ7xmruuyrc2Wy0r7@fat_crate.local
...
IOW, you need to drop those tags.
> +/* Flush all mappings for all PCIDs except globals. */
This comment should state that addr=0 means both rax[1] (valid PCID) and
rax[2] (valid ASID) are clear and this means: flush *any* PCID and ASID. So
that it is clear.
> +static inline void invlpgb_flush_all_nonglobals(void)
> +{
> + __invlpgb(0, 0, 0, 1, 0, 0);
> + __tlbsync();
> +}
> +
> #endif /* _ASM_X86_TLB_H */
> --
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
The following commit has been merged into the x86/core branch of tip:
Commit-ID: b7aa05cbdc52d61119b0e736bb3e288735f860fe
Gitweb: https://git.kernel.org/tip/b7aa05cbdc52d61119b0e736bb3e288735f860fe
Author: Rik van Riel <riel@surriel.com>
AuthorDate: Fri, 28 Feb 2025 20:32:30 +01:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 19 Mar 2025 11:12:25 +01:00
x86/mm: Add INVLPGB support code
Add helper functions and definitions needed to use broadcast TLB
invalidation on AMD CPUs.
[ bp:
- Cleanup commit message
- Improve and expand comments
- push the preemption guards inside the invlpgb* helpers
- merge improvements from dhansen
- add !CONFIG_BROADCAST_TLB_FLUSH function stubs because Clang
can't do DCE properly yet and looks at the inline asm and
complains about it getting a u64 argument on 32-bit code ]
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com
---
arch/x86/include/asm/tlb.h | 132 ++++++++++++++++++++++++++++++++++++-
1 file changed, 132 insertions(+)
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 77f52bc..31f6db4 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
static inline void tlb_flush(struct mmu_gather *tlb)
{
@@ -25,4 +28,133 @@ static inline void invlpg(unsigned long addr)
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
}
+enum addr_stride {
+ PTE_STRIDE = 0,
+ PMD_STRIDE = 1
+};
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond
+ * the first page, while __invlpgb gets the more human readable number of
+ * pages to invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride stride, u8 flags)
+{
+ u32 edx = (pcid << 16) | asid;
+ u32 ecx = (stride << 31) | (nr_pages - 1);
+ u64 rax = addr | flags;
+
+ /* The low bits in rax are for flags. Verify addr is clean. */
+ VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+ /* INVLPGB; supported in binutils >= 2.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+ __invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
+static inline void __tlbsync(void)
+{
+ /*
+ * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+ * to have completed. Print a warning if the task has been migrated,
+ * and might not be waiting on all the INVLPGBs issued during this TLB
+ * invalidation sequence.
+ */
+ cant_migrate();
+
+ /* TLBSYNC: supported in binutils >= 0.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID: invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA BIT(0)
+#define INVLPGB_FLAG_PCID BIT(1)
+#define INVLPGB_FLAG_ASID BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS 0UL
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr, bool stride)
+{
+ enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+ u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+ __invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+ __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+ /*
+ * TLBSYNC at the end needs to make sure all flushes done on the
+ * current CPU have been executed system-wide. Therefore, make
+ * sure nothing gets migrated in-between but disable preemption
+ * as it is cheaper.
+ */
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+ __tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+ __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+ __tlbsync();
+}
#endif /* _ASM_X86_TLB_H */
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: 6272c3a217e5837c72c6714d1e7eddd34254fac3
Gitweb: https://git.kernel.org/tip/6272c3a217e5837c72c6714d1e7eddd34254fac3
Author: Rik van Riel <riel@surriel.com>
AuthorDate: Fri, 28 Feb 2025 20:32:30 +01:00
Committer: Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Wed, 05 Mar 2025 17:19:46 +01:00
x86/mm: Add INVLPGB support code
Add helper functions and definitions needed to use broadcast TLB
invalidation on AMD CPUs.
[ bp:
- Cleanup commit message
- Improve and expand comments
- push the preemption guards inside the invlpgb* helpers
- merge improvements from dhansen
- add !CONFIG_BROADCAST_TLB_FLUSH function stubs because Clang
can't do DCE properly yet and looks at the inline asm and
complains about it getting a u64 argument on 32-bit code ]
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com
---
arch/x86/include/asm/tlb.h | 132 ++++++++++++++++++++++++++++++++++++-
1 file changed, 132 insertions(+)
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 77f52bc..31f6db4 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
static inline void tlb_flush(struct mmu_gather *tlb)
{
@@ -25,4 +28,133 @@ static inline void invlpg(unsigned long addr)
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
}
+enum addr_stride {
+ PTE_STRIDE = 0,
+ PMD_STRIDE = 1
+};
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond
+ * the first page, while __invlpgb gets the more human readable number of
+ * pages to invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride stride, u8 flags)
+{
+ u32 edx = (pcid << 16) | asid;
+ u32 ecx = (stride << 31) | (nr_pages - 1);
+ u64 rax = addr | flags;
+
+ /* The low bits in rax are for flags. Verify addr is clean. */
+ VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+ /* INVLPGB; supported in binutils >= 2.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+ __invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
+static inline void __tlbsync(void)
+{
+ /*
+ * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+ * to have completed. Print a warning if the task has been migrated,
+ * and might not be waiting on all the INVLPGBs issued during this TLB
+ * invalidation sequence.
+ */
+ cant_migrate();
+
+ /* TLBSYNC: supported in binutils >= 0.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID: invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA BIT(0)
+#define INVLPGB_FLAG_PCID BIT(1)
+#define INVLPGB_FLAG_ASID BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS 0UL
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr, bool stride)
+{
+ enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+ u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+ __invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+ __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+ /*
+ * TLBSYNC at the end needs to make sure all flushes done on the
+ * current CPU have been executed system-wide. Therefore, make
+ * sure nothing gets migrated in-between but disable preemption
+ * as it is cheaper.
+ */
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+ __tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+ __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+ __tlbsync();
+}
#endif /* _ASM_X86_TLB_H */
© 2016 - 2025 Red Hat, Inc.