[PATCH v14 03/13] x86/mm: add INVLPGB support code

Rik van Riel posted 13 patches 9 months, 3 weeks ago
There is a newer version of this series
[PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Rik van Riel 9 months, 3 weeks ago
Add helper functions and definitions needed to use broadcast TLB
invalidation on AMD EPYC 3 and newer CPUs.

All the functions defined in invlpgb.h are used later in the series.

Compile time disabling X86_FEATURE_INVLPGB when the config
option is not set allows the compiler to omit unnecessary code.

Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
Tested-by: Brendan Jackman <jackmanb@google.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/disabled-features.h |  8 +-
 arch/x86/include/asm/tlb.h               | 98 ++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index c492bdc97b05..625a89259968 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -129,6 +129,12 @@
 #define DISABLE_SEV_SNP		(1 << (X86_FEATURE_SEV_SNP & 31))
 #endif
 
+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
+#define DISABLE_INVLPGB		0
+#else
+#define DISABLE_INVLPGB		(1 << (X86_FEATURE_INVLPGB & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -146,7 +152,7 @@
 #define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
 			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
 #define DISABLED_MASK12	(DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13	0
+#define DISABLED_MASK13	(DISABLE_INVLPGB)
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 77f52bc1578a..91c9a4da3ace 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
 static inline void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
@@ -25,4 +28,99 @@ static inline void invlpg(unsigned long addr)
 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 }
 
+
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond
+ * the first page, while __invlpgb gets the more human readable number of
+ * pages to invalidate.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     bool pmd_stride, u8 flags)
+{
+	u32 edx = (pcid << 16) | asid;
+	u32 ecx = (pmd_stride << 31) | (nr_pages - 1);
+	u64 rax = addr | flags;
+
+	/* The low bits in rax are for flags. Verify addr is clean. */
+	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+	/* INVLPGB; supported in binutils >= 2.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __tlbsync(void)
+{
+	/*
+	 * tlbsync waits for invlpgb instructions originating on the
+	 * same CPU to have completed. Print a warning if we could have
+	 * migrated, and might not be waiting on all the invlpgbs issued
+	 * during this TLB invalidation sequence.
+	 */
+	cant_migrate();
+
+	/* TLBSYNC: supported in binutils >= 0.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - INVLPGB_PCID:			  invalidate all TLB entries matching the PCID
+ *
+ * The first can be used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_VA			BIT(0)
+#define INVLPGB_PCID			BIT(1)
+#define INVLPGB_ASID			BIT(2)
+#define INVLPGB_INCLUDE_GLOBAL		BIT(3)
+#define INVLPGB_FINAL_ONLY		BIT(4)
+#define INVLPGB_INCLUDE_NESTED		BIT(5)
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr,
+						bool pmd_stride)
+{
+	__invlpgb(0, pcid, addr, nr, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb(0, pcid, 0, 1, 0, INVLPGB_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+	__invlpgb(0, 0, 0, 1, 0, INVLPGB_INCLUDE_GLOBAL);
+	__tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb(0, 0, addr, nr, 0, INVLPGB_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+	__invlpgb(0, 0, 0, 1, 0, 0);
+	__tlbsync();
+}
+
 #endif /* _ASM_X86_TLB_H */
-- 
2.47.1
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Borislav Petkov 9 months, 3 weeks ago
On Tue, Feb 25, 2025 at 10:00:38PM -0500, Rik van Riel wrote:
> Add helper functions and definitions needed to use broadcast TLB
> invalidation on AMD EPYC 3 and newer CPUs.
> 
> All the functions defined in invlpgb.h are used later in the series.
> 
> Compile time disabling X86_FEATURE_INVLPGB when the config
> option is not set allows the compiler to omit unnecessary code.
> 
> Signed-off-by: Rik van Riel <riel@surriel.com>
> Tested-by: Manali Shukla <Manali.Shukla@amd.com>
> Tested-by: Brendan Jackman <jackmanb@google.com>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> Acked-by: Dave Hansen <dave.hansen@intel.com>
> ---
>  arch/x86/include/asm/disabled-features.h |  8 +-
>  arch/x86/include/asm/tlb.h               | 98 ++++++++++++++++++++++++
>  2 files changed, 105 insertions(+), 1 deletion(-)

My edits ontop.

x86/cpu has dropped {disabled,required}-features.h in favor of a new, better
mechanism to compile-time disable X86 features, see below.

--- /tmp/current.patch	2025-02-28 20:44:40.765404608 +0100
+++ /tmp/0001-x86-mm-Add-INVLPGB-support-code.patch	2025-02-28 20:44:18.492326903 +0100
@@ -1,55 +1,38 @@
+From ce22946ea806ae459b4d88767a59b010e70682d5 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Tue, 25 Feb 2025 22:00:38 -0500
-Subject: x86/mm: Add INVLPGB support code
+Date: Fri, 28 Feb 2025 20:32:30 +0100
+Subject: [PATCH]  x86/mm: Add INVLPGB support code
 
 Add helper functions and definitions needed to use broadcast TLB
-invalidation on AMD EPYC 3 and newer CPUs.
+invalidation on AMD CPUs.
 
-All the functions defined in invlpgb.h are used later in the series.
-
-Compile time disabling X86_FEATURE_INVLPGB when the config
-option is not set allows the compiler to omit unnecessary code.
+  [ bp:
+      - Cleanup commit message
+      - port it to new Kconfig.cpufeatures machinery
+      - add a comment about flushing any PCID and ASID ]
 
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
-Acked-by: Dave Hansen <dave.hansen@intel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
-Tested-by: Brendan Jackman <jackmanb@google.com>
-Tested-by: Michael Kelley <mhklinux@outlook.com>
 Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com
 ---
- arch/x86/include/asm/disabled-features.h |  8 +-
- arch/x86/include/asm/tlb.h               | 98 ++++++++++++++++++++++++
- 2 files changed, 105 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
-index c492bdc97b05..625a89259968 100644
---- a/arch/x86/include/asm/disabled-features.h
-+++ b/arch/x86/include/asm/disabled-features.h
-@@ -129,6 +129,12 @@
- #define DISABLE_SEV_SNP		(1 << (X86_FEATURE_SEV_SNP & 31))
- #endif
- 
-+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
-+#define DISABLE_INVLPGB		0
-+#else
-+#define DISABLE_INVLPGB		(1 << (X86_FEATURE_INVLPGB & 31))
-+#endif
-+
- /*
-  * Make sure to add features to the correct mask
-  */
-@@ -146,7 +152,7 @@
- #define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
- 			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
- #define DISABLED_MASK12	(DISABLE_FRED|DISABLE_LAM)
--#define DISABLED_MASK13	0
-+#define DISABLED_MASK13	(DISABLE_INVLPGB)
- #define DISABLED_MASK14	0
- #define DISABLED_MASK15	0
- #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
+ arch/x86/Kconfig.cpufeatures |   4 ++
+ arch/x86/include/asm/tlb.h   | 101 +++++++++++++++++++++++++++++++++++
+ 2 files changed, 105 insertions(+)
+
+diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures
+index 5dcc49d928c5..f9af51205f07 100644
+--- a/arch/x86/Kconfig.cpufeatures
++++ b/arch/x86/Kconfig.cpufeatures
+@@ -195,3 +195,7 @@ config X86_DISABLED_FEATURE_FRED
+ config X86_DISABLED_FEATURE_SEV_SNP
+ 	def_bool y
+ 	depends on !KVM_AMD_SEV
++
++config X86_DISABLED_FEATURE_BROADCAST_TLB_FLUSH
++	def_bool y
++	depends on !X86_BROADCAST_TLB_FLUSH
 diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
-index 77f52bc1578a..91c9a4da3ace 100644
+index 77f52bc1578a..45d9c7687d61 100644
 --- a/arch/x86/include/asm/tlb.h
 +++ b/arch/x86/include/asm/tlb.h
 @@ -6,6 +6,9 @@
@@ -62,7 +45,7 @@ index 77f52bc1578a..91c9a4da3ace 100644
  
  static inline void tlb_flush(struct mmu_gather *tlb)
  {
-@@ -25,4 +28,99 @@ static inline void invlpg(unsigned long addr)
+@@ -25,4 +28,102 @@ static inline void invlpg(unsigned long addr)
  	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
  }
  
@@ -157,11 +140,14 @@ index 77f52bc1578a..91c9a4da3ace 100644
 +/* Flush all mappings for all PCIDs except globals. */
 +static inline void invlpgb_flush_all_nonglobals(void)
 +{
++	/*
++	 * @addr=0 means both rax[1] (valid PCID) and rax[2] (valid ASID) are clear
++	 * so flush *any* PCID and ASID.
++	 */
 +	__invlpgb(0, 0, 0, 1, 0, 0);
 +	__tlbsync();
 +}
-+
  #endif /* _ASM_X86_TLB_H */
 -- 

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Dave Hansen 9 months, 2 weeks ago
On 2/28/25 11:47, Borislav Petkov wrote:
> @@ -157,11 +140,14 @@ index 77f52bc1578a..91c9a4da3ace 100644
>  +/* Flush all mappings for all PCIDs except globals. */
>  +static inline void invlpgb_flush_all_nonglobals(void)
>  +{
> ++	/*
> ++	 * @addr=0 means both rax[1] (valid PCID) and rax[2] (valid ASID) are clear
> ++	 * so flush *any* PCID and ASID.
> ++	 */
>  +	__invlpgb(0, 0, 0, 1, 0, 0);
>  +	__tlbsync();
>  +}

I had a bit of an allergic reaction to all of the magic numbers.

Could we do something like the attached where we give a _few_ of the
magic numbers some symbolic names?

For instance, instead of passing around a bool for pmd_stride, this uses
an enum. It also explicitly separates things that are setting
pmd_stride=0 but are really saying "this is a 4k stride" from things
that set pmd_stride=0 but are for operations that don't _have_ a stride.
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Dave Hansen 9 months, 2 weeks ago
Here's a plain diff if you just want to squish it in.
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Borislav Petkov 9 months, 2 weeks ago
On Mon, Mar 03, 2025 at 11:23:58AM -0800, Dave Hansen wrote:
> Here's a plain diff if you just want to squish it in.

> diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
> index 5375145eb9596..3bd617c204346 100644
> --- a/arch/x86/include/asm/tlb.h
> +++ b/arch/x86/include/asm/tlb.h
> @@ -28,6 +28,11 @@ static inline void invlpg(unsigned long addr)
>  	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
>  }
>  
> +enum invlpgb_stride {

Right, this is an address stride, as the text calls it.

> +	NO_STRIDE  = 0,
> +	PTE_STRIDE = 0,

Ok, so those are confusing. No stride is PTE stride so let's just zap
NO_STRIDE.

> +	PMD_STRIDE = 1
> +};
>  
>  /*
>   * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.

...

>  static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
>  						unsigned long addr,
>  						u16 nr,
>  						bool pmd_stride)

You're relying on the fact that true == PMD_STRIDE and false to PTE_STRIDE but
let's make it Right(tm), see below.

Rest looks ok.

IOW, I'm merging this into patch 3:

diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 5375145eb959..6718835c3b0c 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -28,6 +28,10 @@ static inline void invlpg(unsigned long addr)
 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 }
 
+enum addr_stride {
+	PTE_STRIDE = 0,
+	PMD_STRIDE = 1
+};
 
 /*
  * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
@@ -48,10 +52,10 @@ static inline void invlpg(unsigned long addr)
  */
 static inline void __invlpgb(unsigned long asid, unsigned long pcid,
 			     unsigned long addr, u16 nr_pages,
-			     bool pmd_stride, u8 flags)
+			     enum addr_stride stride, u8 flags)
 {
 	u32 edx = (pcid << 16) | asid;
-	u32 ecx = (pmd_stride << 31) | (nr_pages - 1);
+	u32 ecx = (stride << 31) | (nr_pages - 1);
 	u64 rax = addr | flags;
 
 	/* The low bits in rax are for flags. Verify addr is clean. */
@@ -78,33 +82,38 @@ static inline void __tlbsync(void)
 /*
  * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
  * of the three. For example:
- * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
- * - INVLPGB_PCID:			  invalidate all TLB entries matching the PCID
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID:			    invalidate all TLB entries matching the PCID
  *
- * The first can be used to invalidate (kernel) mappings at a particular
+ * The first is used to invalidate (kernel) mappings at a particular
  * address across all processes.
  *
  * The latter invalidates all TLB entries matching a PCID.
  */
-#define INVLPGB_VA			BIT(0)
-#define INVLPGB_PCID			BIT(1)
-#define INVLPGB_ASID			BIT(2)
-#define INVLPGB_INCLUDE_GLOBAL		BIT(3)
-#define INVLPGB_FINAL_ONLY		BIT(4)
-#define INVLPGB_INCLUDE_NESTED		BIT(5)
+#define INVLPGB_FLAG_VA			BIT(0)
+#define INVLPGB_FLAG_PCID		BIT(1)
+#define INVLPGB_FLAG_ASID		BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL	BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY		BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED	BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS	0UL
 
 static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
 						unsigned long addr,
-						u16 nr,
-						bool pmd_stride)
+						u16 nr, bool stride)
 {
-	__invlpgb(0, pcid, addr, nr, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+	enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+	u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+	__invlpgb(0, pcid, addr, nr, str, flags);
 }
 
 /* Flush all mappings for a given PCID, not including globals. */
 static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
 {
-	__invlpgb(0, pcid, 0, 1, 0, INVLPGB_PCID);
+	__invlpgb(0, pcid, 0, 1, PTE_STRIDE, INVLPGB_FLAG_PCID);
 }
 
 /* Flush all mappings, including globals, for all PCIDs. */
@@ -117,21 +126,21 @@ static inline void invlpgb_flush_all(void)
 	 * as it is cheaper.
 	 */
 	guard(preempt)();
-	__invlpgb(0, 0, 0, 1, 0, INVLPGB_INCLUDE_GLOBAL);
+	__invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
 	__tlbsync();
 }
 
 /* Flush addr, including globals, for all PCIDs. */
 static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
 {
-	__invlpgb(0, 0, addr, nr, 0, INVLPGB_INCLUDE_GLOBAL);
+	__invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
 }
 
 /* Flush all mappings for all PCIDs except globals. */
 static inline void invlpgb_flush_all_nonglobals(void)
 {
 	guard(preempt)();
-	__invlpgb(0, 0, 0, 1, 0, 0);
+	__invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_MODE_ALL_NONGLOBALS);
 	__tlbsync();
 }
 #endif /* _ASM_X86_TLB_H */

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Dave Hansen 9 months, 2 weeks ago
On 3/4/25 03:00, Borislav Petkov wrote:
> On Mon, Mar 03, 2025 at 11:23:58AM -0800, Dave Hansen wrote:
>> Here's a plain diff if you just want to squish it in.
> 
>> diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
>> index 5375145eb9596..3bd617c204346 100644
>> --- a/arch/x86/include/asm/tlb.h
>> +++ b/arch/x86/include/asm/tlb.h
>> @@ -28,6 +28,11 @@ static inline void invlpg(unsigned long addr)
>>  	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
>>  }
>>  
>> +enum invlpgb_stride {
> 
> Right, this is an address stride, as the text calls it.
> 
>> +	NO_STRIDE  = 0,
>> +	PTE_STRIDE = 0,
> 
> Ok, so those are confusing. No stride is PTE stride so let's just zap
> NO_STRIDE.

Passing "PTE_STRIDE" to an operation that doesn't have a stride is
pretty confusing too.

...
>  /* Flush all mappings, including globals, for all PCIDs. */
> @@ -117,21 +126,21 @@ static inline void invlpgb_flush_all(void)
>  	 * as it is cheaper.
>  	 */
>  	guard(preempt)();
> -	__invlpgb(0, 0, 0, 1, 0, INVLPGB_INCLUDE_GLOBAL);
> +	__invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
>  	__tlbsync();
>  }

This one, for example. It's not flushing PTEs an doesn't have a start
address or nr>0.

So, we could have the enum be totally divorced from the hardware type:

	NO_STRIDE,
	PTE_STRIDE,
	PMD_STRIDE

and decode it at the end:

	if (stride == PMD_STRIDE)
		foo | PMD_STRIDE_BIT;
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Borislav Petkov 9 months, 2 weeks ago
On Tue, Mar 04, 2025 at 07:10:13AM -0800, Dave Hansen wrote:
> So, we could have the enum be totally divorced from the hardware type:
> 
> 	NO_STRIDE,
> 	PTE_STRIDE,
> 	PMD_STRIDE

How about we completely hide that NO_STRIDE thing and do a __invlpgb_all()
"sub-helper" which is basically telling you it is invalidating all kinds of
TLB entries and stride does not apply there:

---

diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index e8561a846754..361b3dde2656 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -66,6 +66,12 @@ static inline void __invlpgb(unsigned long asid, unsigned long pcid,
 	asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
 }
 
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid,
+				 unsigned long addr, u16 nr_pages, u8 flags)
+{
+	__invlpgb(asid, pcid, addr, nr_pages, 0, flags);
+}
+
 static inline void __tlbsync(void)
 {
 	/*
@@ -84,6 +90,8 @@ static inline void __tlbsync(void)
 static inline void __invlpgb(unsigned long asid, unsigned long pcid,
 			     unsigned long addr, u16 nr_pages,
 			     enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid,
+				 unsigned long addr, u16 nr_pages, u8 flags) { }
 static inline void __tlbsync(void) { }
 #endif
 
@@ -121,7 +129,7 @@ static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
 /* Flush all mappings for a given PCID, not including globals. */
 static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
 {
-	__invlpgb(0, pcid, 0, 1, PTE_STRIDE, INVLPGB_FLAG_PCID);
+	__invlpgb_all(0, pcid, 0, 1, INVLPGB_FLAG_PCID);
 }
 
 /* Flush all mappings, including globals, for all PCIDs. */
@@ -134,7 +142,7 @@ static inline void invlpgb_flush_all(void)
 	 * as it is cheaper.
 	 */
 	guard(preempt)();
-	__invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+	__invlpgb_all(0, 0, 0, 1, INVLPGB_FLAG_INCLUDE_GLOBAL);
 	__tlbsync();
 }
 
@@ -148,7 +156,7 @@ static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
 static inline void invlpgb_flush_all_nonglobals(void)
 {
 	guard(preempt)();
-	__invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_MODE_ALL_NONGLOBALS);
+	__invlpgb_all(0, 0, 0, 1, INVLPGB_MODE_ALL_NONGLOBALS);
 	__tlbsync();
 }
 #endif /* _ASM_X86_TLB_H */

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Dave Hansen 9 months, 2 weeks ago
On 3/4/25 08:19, Borislav Petkov wrote:
> +static inline void __invlpgb_all(unsigned long asid, unsigned long pcid,
> +				 unsigned long addr, u16 nr_pages, u8 flags)
> +{
> +	__invlpgb(asid, pcid, addr, nr_pages, 0, flags);
> +}

Why would __invlpg_all() need an 'addr' or 'nr_pages'? Shouldn't those be 0?

It's _better_ of course when it happens at a single site and it's close
to a prototype for __invlpgb(). But it's still a magic '0' that it's
impossible to make sense of without looking at the prototype.

Looking at the APM again... there really are three possible values for
ECX[31]:

 0: increment by 4k
 1: increment by 2M
 X: Don't care, no increment is going to happen

What you wrote above could actually be written:

	__invlpgb(asid, pcid, addr, nr_pages, 1, flags);

so the 0/1 is _actually_ completely random and arbitrary as far as the
spec goes.

Why does it matter?

It enables you to do sanity checking. For example, we could actually
enforce a rule that "no stride" can't be paired with any of the
per-address invalidation characteristics:

	if (stride == NO_STRIDE) {
		WARN_ON(flags & INVLPGB_FLAG_VA);
		WARN_ON(addr);
		WARN_ON(nr_pages);
	}

That's impossible if you pass a 'bool' in.

But, honestly, I'm deep into nitpick mode here. I think differentiating
the three cases is worth it, but it's also not the hill I'm going to die
on. ;)
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Borislav Petkov 9 months, 2 weeks ago
On Tue, Mar 04, 2025 at 08:57:30AM -0800, Dave Hansen wrote:
> Why would __invlpg_all() need an 'addr' or 'nr_pages'? Shouldn't those be 0?

Yap, good idea. It makes the _all helper even better:

static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
{
        __invlpgb(asid, pcid, 0, 1, 0, flags);
}

> It's _better_ of course when it happens at a single site and it's close
> to a prototype for __invlpgb(). But it's still a magic '0' that it's
> impossible to make sense of without looking at the prototype.

Yes.

> Looking at the APM again... there really are three possible values for
> ECX[31]:
> 
>  0: increment by 4k
>  1: increment by 2M
>  X: Don't care, no increment is going to happen
> 
> What you wrote above could actually be written:
> 
> 	__invlpgb(asid, pcid, addr, nr_pages, 1, flags);
> 
> so the 0/1 is _actually_ completely random and arbitrary as far as the
> spec goes.

Yes.

> Why does it matter?
> 
> It enables you to do sanity checking. For example, we could actually
> enforce a rule that "no stride" can't be paired with any of the
> per-address invalidation characteristics:
> 
> 	if (stride == NO_STRIDE) {
> 		WARN_ON(flags & INVLPGB_FLAG_VA);
> 		WARN_ON(addr);
> 		WARN_ON(nr_pages);
> 	}
> 
> That's impossible if you pass a 'bool' in.
> 
> But, honestly, I'm deep into nitpick mode here. I think differentiating
> the three cases is worth it, but it's also not the hill I'm going to die
> on. ;)

Yap, and now I've massaged it so much so that it doesn't really need that
checking. Because I have exactly two calls which use the stride:

1.

static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
                                                  unsigned long addr,
                                                  u16 nr, bool stride)
{
        enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
        u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;

        __invlpgb(0, pcid, addr, nr, str, flags);
}

This one is fine - I verify it.

2.

/* Flush addr, including globals, for all PCIDs. */
static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
{
	__invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
}

This one controls it already.

So the only case where something could go bad is when one would use
__invlpgb() directly and that should hopefully be caught early enough.

But if you really want, I could add sanitization to __invlpgb() to massage it
into the right stride. And print a single warning - the big fat WARN* in an
inline functions are probably too much. Hm, I dunno...

Current diff ontop:

diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index e8561a846754..8ab21487d6ee 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -66,6 +66,11 @@ static inline void __invlpgb(unsigned long asid, unsigned long pcid,
 	asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
 }
 
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+	__invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
 static inline void __tlbsync(void)
 {
 	/*
@@ -84,6 +89,7 @@ static inline void __tlbsync(void)
 static inline void __invlpgb(unsigned long asid, unsigned long pcid,
 			     unsigned long addr, u16 nr_pages,
 			     enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
 static inline void __tlbsync(void) { }
 #endif
 
@@ -121,7 +127,7 @@ static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
 /* Flush all mappings for a given PCID, not including globals. */
 static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
 {
-	__invlpgb(0, pcid, 0, 1, PTE_STRIDE, INVLPGB_FLAG_PCID);
+	__invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
 }
 
 /* Flush all mappings, including globals, for all PCIDs. */
@@ -134,7 +140,7 @@ static inline void invlpgb_flush_all(void)
 	 * as it is cheaper.
 	 */
 	guard(preempt)();
-	__invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+	__invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
 	__tlbsync();
 }
 
@@ -148,7 +154,7 @@ static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
 static inline void invlpgb_flush_all_nonglobals(void)
 {
 	guard(preempt)();
-	__invlpgb(0, 0, 0, 1, PTE_STRIDE, INVLPGB_MODE_ALL_NONGLOBALS);
+	__invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
 	__tlbsync();
 }
 #endif /* _ASM_X86_TLB_H */

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Dave Hansen 9 months, 3 weeks ago
On 2/25/25 19:00, Rik van Riel wrote:
> Add helper functions and definitions needed to use broadcast TLB
> invalidation on AMD EPYC 3 and newer CPUs.

I don't know if I mentioned it earlier, but I'd leave this explanation
of where the feature shows up for the cover letter or the Documentation/.
Re: [PATCH v14 03/13] x86/mm: add INVLPGB support code
Posted by Borislav Petkov 9 months, 3 weeks ago
On Tue, Feb 25, 2025 at 10:00:38PM -0500, Rik van Riel wrote:
> Add helper functions and definitions needed to use broadcast TLB
> invalidation on AMD EPYC 3 and newer CPUs.
> 
> All the functions defined in invlpgb.h are used later in the series.

Uff, that's tlb.h now. As already said. :-\

Btw, this is why there's no point to write *what* the patch does - that is
visible from the diff itself. This sentence is simply not needed.

> Compile time disabling X86_FEATURE_INVLPGB when the config
> option is not set allows the compiler to omit unnecessary code.
> 
> Signed-off-by: Rik van Riel <riel@surriel.com>
> Tested-by: Manali Shukla <Manali.Shukla@amd.com>
> Tested-by: Brendan Jackman <jackmanb@google.com>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> Acked-by: Dave Hansen <dave.hansen@intel.com>

And I asked you already but still crickets:

What do those Tested-by tags mean if you keep changing the patches?!

https://lore.kernel.org/r/20250224123142.GFZ7xmruuyrc2Wy0r7@fat_crate.local

...

IOW, you need to drop those tags.

> +/* Flush all mappings for all PCIDs except globals. */

This comment should state that addr=0 means both rax[1] (valid PCID) and
rax[2] (valid ASID) are clear and this means: flush *any* PCID and ASID. So
that it is clear.

> +static inline void invlpgb_flush_all_nonglobals(void)
> +{
> +	__invlpgb(0, 0, 0, 1, 0, 0);
> +	__tlbsync();
> +}
> +
>  #endif /* _ASM_X86_TLB_H */
> -- 

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
[tip: x86/core] x86/mm: Add INVLPGB support code
Posted by tip-bot2 for Rik van Riel 9 months ago
The following commit has been merged into the x86/core branch of tip:

Commit-ID:     b7aa05cbdc52d61119b0e736bb3e288735f860fe
Gitweb:        https://git.kernel.org/tip/b7aa05cbdc52d61119b0e736bb3e288735f860fe
Author:        Rik van Riel <riel@surriel.com>
AuthorDate:    Fri, 28 Feb 2025 20:32:30 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 19 Mar 2025 11:12:25 +01:00

x86/mm: Add INVLPGB support code

Add helper functions and definitions needed to use broadcast TLB
invalidation on AMD CPUs.

  [ bp:
      - Cleanup commit message
      - Improve and expand comments
      - push the preemption guards inside the invlpgb* helpers
      - merge improvements from dhansen
      - add !CONFIG_BROADCAST_TLB_FLUSH function stubs because Clang
	can't do DCE properly yet and looks at the inline asm and
	complains about it getting a u64 argument on 32-bit code ]

Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com
---
 arch/x86/include/asm/tlb.h | 132 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 132 insertions(+)

diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 77f52bc..31f6db4 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
 static inline void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
@@ -25,4 +28,133 @@ static inline void invlpg(unsigned long addr)
 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 }
 
+enum addr_stride {
+	PTE_STRIDE = 0,
+	PMD_STRIDE = 1
+};
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond
+ * the first page, while __invlpgb gets the more human readable number of
+ * pages to invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     enum addr_stride stride, u8 flags)
+{
+	u32 edx = (pcid << 16) | asid;
+	u32 ecx = (stride << 31) | (nr_pages - 1);
+	u64 rax = addr | flags;
+
+	/* The low bits in rax are for flags. Verify addr is clean. */
+	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+	/* INVLPGB; supported in binutils >= 2.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+	__invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
+static inline void __tlbsync(void)
+{
+	/*
+	 * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+	 * to have completed. Print a warning if the task has been migrated,
+	 * and might not be waiting on all the INVLPGBs issued during this TLB
+	 * invalidation sequence.
+	 */
+	cant_migrate();
+
+	/* TLBSYNC: supported in binutils >= 0.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID:			    invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA			BIT(0)
+#define INVLPGB_FLAG_PCID		BIT(1)
+#define INVLPGB_FLAG_ASID		BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL	BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY		BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED	BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS	0UL
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr, bool stride)
+{
+	enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+	u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+	__invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+	/*
+	 * TLBSYNC at the end needs to make sure all flushes done on the
+	 * current CPU have been executed system-wide. Therefore, make
+	 * sure nothing gets migrated in-between but disable preemption
+	 * as it is cheaper.
+	 */
+	guard(preempt)();
+	__invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+	__tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+	guard(preempt)();
+	__invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+	__tlbsync();
+}
 #endif /* _ASM_X86_TLB_H */
[tip: x86/mm] x86/mm: Add INVLPGB support code
Posted by tip-bot2 for Rik van Riel 9 months, 2 weeks ago
The following commit has been merged into the x86/mm branch of tip:

Commit-ID:     6272c3a217e5837c72c6714d1e7eddd34254fac3
Gitweb:        https://git.kernel.org/tip/6272c3a217e5837c72c6714d1e7eddd34254fac3
Author:        Rik van Riel <riel@surriel.com>
AuthorDate:    Fri, 28 Feb 2025 20:32:30 +01:00
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Wed, 05 Mar 2025 17:19:46 +01:00

 x86/mm: Add INVLPGB support code

Add helper functions and definitions needed to use broadcast TLB
invalidation on AMD CPUs.

  [ bp:
      - Cleanup commit message
      - Improve and expand comments
      - push the preemption guards inside the invlpgb* helpers
      - merge improvements from dhansen
      - add !CONFIG_BROADCAST_TLB_FLUSH function stubs because Clang
	can't do DCE properly yet and looks at the inline asm and
	complains about it getting a u64 argument on 32-bit code ]

Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com
---
 arch/x86/include/asm/tlb.h | 132 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 132 insertions(+)

diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 77f52bc..31f6db4 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
 static inline void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
@@ -25,4 +28,133 @@ static inline void invlpg(unsigned long addr)
 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 }
 
+enum addr_stride {
+	PTE_STRIDE = 0,
+	PMD_STRIDE = 1
+};
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond
+ * the first page, while __invlpgb gets the more human readable number of
+ * pages to invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     enum addr_stride stride, u8 flags)
+{
+	u32 edx = (pcid << 16) | asid;
+	u32 ecx = (stride << 31) | (nr_pages - 1);
+	u64 rax = addr | flags;
+
+	/* The low bits in rax are for flags. Verify addr is clean. */
+	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+	/* INVLPGB; supported in binutils >= 2.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+	__invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
+static inline void __tlbsync(void)
+{
+	/*
+	 * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+	 * to have completed. Print a warning if the task has been migrated,
+	 * and might not be waiting on all the INVLPGBs issued during this TLB
+	 * invalidation sequence.
+	 */
+	cant_migrate();
+
+	/* TLBSYNC: supported in binutils >= 0.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID:			    invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA			BIT(0)
+#define INVLPGB_FLAG_PCID		BIT(1)
+#define INVLPGB_FLAG_ASID		BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL	BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY		BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED	BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS	0UL
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr, bool stride)
+{
+	enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+	u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+	__invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+	/*
+	 * TLBSYNC at the end needs to make sure all flushes done on the
+	 * current CPU have been executed system-wide. Therefore, make
+	 * sure nothing gets migrated in-between but disable preemption
+	 * as it is cheaper.
+	 */
+	guard(preempt)();
+	__invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+	__tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+	guard(preempt)();
+	__invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+	__tlbsync();
+}
 #endif /* _ASM_X86_TLB_H */