Use broadcast TLB invalidation for kernel addresses when available.
Remove the need to send IPIs for kernel TLB flushes.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
Tested-by: Brendan Jackman <jackmanb@google.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
---
arch/x86/mm/tlb.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index dbcb5c968ff9..f44a03bca41c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1077,6 +1077,18 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
+static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
+{
+ unsigned long addr, nr;
+
+ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
+ nr = (info->end - addr) >> PAGE_SHIFT;
+ nr = clamp_val(nr, 1, invlpgb_count_max);
+ invlpgb_flush_addr_nosync(addr, nr);
+ }
+ __tlbsync();
+}
+
static void do_kernel_range_flush(void *info)
{
struct flush_tlb_info *f = info;
@@ -1087,6 +1099,22 @@ static void do_kernel_range_flush(void *info)
flush_tlb_one_kernel(addr);
}
+static void kernel_tlb_flush_all(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+static void kernel_tlb_flush_range(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_kernel_range_flush(info);
+ else
+ on_each_cpu(do_kernel_range_flush, info, 1);
+}
+
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
struct flush_tlb_info *info;
@@ -1097,9 +1125,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
TLB_GENERATION_INVALID);
if (info->end == TLB_FLUSH_ALL)
- on_each_cpu(do_flush_tlb_all, NULL, 1);
+ kernel_tlb_flush_all(info);
else
- on_each_cpu(do_kernel_range_flush, info, 1);
+ kernel_tlb_flush_range(info);
put_flush_tlb_info();
}
--
2.47.1
On Tue, Feb 25, 2025 at 10:00:39PM -0500, Rik van Riel wrote:
> Use broadcast TLB invalidation for kernel addresses when available.
>
> Remove the need to send IPIs for kernel TLB flushes.
>
> Signed-off-by: Rik van Riel <riel@surriel.com>
> Tested-by: Manali Shukla <Manali.Shukla@amd.com>
> Tested-by: Brendan Jackman <jackmanb@google.com>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> ---
> arch/x86/mm/tlb.c | 32 ++++++++++++++++++++++++++++++--
> 1 file changed, 30 insertions(+), 2 deletions(-)
Changes ontop:
--- /tmp/current.patch 2025-02-28 22:39:33.236465716 +0100
+++ /tmp/0001-x86-mm-Use-INVLPGB-for-kernel-TLB-flushes.patch 2025-02-28 22:39:59.432310072 +0100
@@ -1,36 +1,43 @@
+From b97ae5e31069cd536b563df185de52d33a565077 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Tue, 25 Feb 2025 22:00:39 -0500
-Subject: x86/mm: Use INVLPGB for kernel TLB flushes
+Subject: [PATCH] x86/mm: Use INVLPGB for kernel TLB flushes
Use broadcast TLB invalidation for kernel addresses when available.
-
Remove the need to send IPIs for kernel TLB flushes.
+ [ bp: Integrate dhansen's comments additions. ]
+
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
-Tested-by: Brendan Jackman <jackmanb@google.com>
-Tested-by: Michael Kelley <mhklinux@outlook.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
Link: https://lore.kernel.org/r/20250226030129.530345-5-riel@surriel.com
---
- arch/x86/mm/tlb.c | 32 ++++++++++++++++++++++++++++++--
- 1 file changed, 30 insertions(+), 2 deletions(-)
+ arch/x86/mm/tlb.c | 39 +++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 37 insertions(+), 2 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index dbcb5c968ff9..f44a03bca41c 100644
+index dbcb5c968ff9..5c44b94ad5af 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
-@@ -1077,6 +1077,18 @@ void flush_tlb_all(void)
+@@ -1077,6 +1077,25 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
++/* Flush an arbitrarily large range of memory with INVLPGB. */
+static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
+{
+ unsigned long addr, nr;
+
+ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
+ nr = (info->end - addr) >> PAGE_SHIFT;
++
++ /*
++ * INVLPGB has a limit on the size of ranges it can
++ * flush. Break up large flushes.
++ */
+ nr = clamp_val(nr, 1, invlpgb_count_max);
++
+ invlpgb_flush_addr_nosync(addr, nr);
+ }
+ __tlbsync();
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 2/25/25 19:00, Rik van Riel wrote:
> Use broadcast TLB invalidation for kernel addresses when available.
>
> Remove the need to send IPIs for kernel TLB flushes.
Nit: the changelog doesn't address the refactoring.
*Ideally*, you'd create the helpers and move the code there in one patch
and then actually "use INVLPGB for kernel TLB flushes" in the next. It's
compact enough here that it's not a deal breaker.
> +static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
> +{
> + unsigned long addr, nr;
> +
> + for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
> + nr = (info->end - addr) >> PAGE_SHIFT;
> + nr = clamp_val(nr, 1, invlpgb_count_max);
> + invlpgb_flush_addr_nosync(addr, nr);
> + }
> + __tlbsync();
> +}
This needs a comment or two. Explaining that the function can take large
sizes:
/*
* Flush an arbitrarily large range of memory with INVLPGB
*/
But that the _instruction_ can not is important. This would be great in
the loop just above the clamp:
/*
* INVLPGB has a limit on the size of ranges
* it can flush. Break large flushes up.
*/
> static void do_kernel_range_flush(void *info)
> {
> struct flush_tlb_info *f = info;
> @@ -1087,6 +1099,22 @@ static void do_kernel_range_flush(void *info)
> flush_tlb_one_kernel(addr);
> }
>
> +static void kernel_tlb_flush_all(struct flush_tlb_info *info)
> +{
> + if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
> + invlpgb_flush_all();
> + else
> + on_each_cpu(do_flush_tlb_all, NULL, 1);
> +}
> +
> +static void kernel_tlb_flush_range(struct flush_tlb_info *info)
> +{
> + if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
> + invlpgb_kernel_range_flush(info);
> + else
> + on_each_cpu(do_kernel_range_flush, info, 1);
> +}
> +
> void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> {
> struct flush_tlb_info *info;
> @@ -1097,9 +1125,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> TLB_GENERATION_INVALID);
>
> if (info->end == TLB_FLUSH_ALL)
> - on_each_cpu(do_flush_tlb_all, NULL, 1);
> + kernel_tlb_flush_all(info);
> else
> - on_each_cpu(do_kernel_range_flush, info, 1);
> + kernel_tlb_flush_range(info);
>
> put_flush_tlb_info();
> }
But the structure of this code is much better than previous versions.
With the comments fixed:
Acked-by: Dave Hansen <dave.hansen@intel.com>
The following commit has been merged into the x86/core branch of tip:
Commit-ID: 82378c6c2f435dba66145609de16bf44a9de6303
Gitweb: https://git.kernel.org/tip/82378c6c2f435dba66145609de16bf44a9de6303
Author: Rik van Riel <riel@surriel.com>
AuthorDate: Tue, 25 Feb 2025 22:00:39 -05:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 19 Mar 2025 11:12:29 +01:00
x86/mm: Use INVLPGB for kernel TLB flushes
Use broadcast TLB invalidation for kernel addresses when available.
Remove the need to send IPIs for kernel TLB flushes.
[ bp: Integrate dhansen's comments additions, merge the
flush_tlb_all() change into this one too. ]
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20250226030129.530345-5-riel@surriel.com
---
arch/x86/mm/tlb.c | 48 ++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 44 insertions(+), 4 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index dbcb5c9..8cd084b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1064,7 +1064,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
-
static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -1074,7 +1073,32 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
- on_each_cpu(do_flush_tlb_all, NULL, 1);
+
+ /* First try (faster) hardware-assisted TLB invalidation. */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
+ /* Fall back to the IPI-based invalidation. */
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+/* Flush an arbitrarily large range of memory with INVLPGB. */
+static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
+{
+ unsigned long addr, nr;
+
+ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
+ nr = (info->end - addr) >> PAGE_SHIFT;
+
+ /*
+ * INVLPGB has a limit on the size of ranges it can
+ * flush. Break up large flushes.
+ */
+ nr = clamp_val(nr, 1, invlpgb_count_max);
+
+ invlpgb_flush_addr_nosync(addr, nr);
+ }
+ __tlbsync();
}
static void do_kernel_range_flush(void *info)
@@ -1087,6 +1111,22 @@ static void do_kernel_range_flush(void *info)
flush_tlb_one_kernel(addr);
}
+static void kernel_tlb_flush_all(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+static void kernel_tlb_flush_range(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_kernel_range_flush(info);
+ else
+ on_each_cpu(do_kernel_range_flush, info, 1);
+}
+
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
struct flush_tlb_info *info;
@@ -1097,9 +1137,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
TLB_GENERATION_INVALID);
if (info->end == TLB_FLUSH_ALL)
- on_each_cpu(do_flush_tlb_all, NULL, 1);
+ kernel_tlb_flush_all(info);
else
- on_each_cpu(do_kernel_range_flush, info, 1);
+ kernel_tlb_flush_range(info);
put_flush_tlb_info();
}
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: ccc19c694b0fe063a90dd27470e9f4ba22990ea1
Gitweb: https://git.kernel.org/tip/ccc19c694b0fe063a90dd27470e9f4ba22990ea1
Author: Rik van Riel <riel@surriel.com>
AuthorDate: Tue, 25 Feb 2025 22:00:39 -05:00
Committer: Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Wed, 05 Mar 2025 17:19:52 +01:00
x86/mm: Use INVLPGB for kernel TLB flushes
Use broadcast TLB invalidation for kernel addresses when available.
Remove the need to send IPIs for kernel TLB flushes.
[ bp: Integrate dhansen's comments additions, merge the
flush_tlb_all() change into this one too. ]
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250226030129.530345-5-riel@surriel.com
---
arch/x86/mm/tlb.c | 48 ++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 44 insertions(+), 4 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index dbcb5c9..8cd084b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1064,7 +1064,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
-
static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -1074,7 +1073,32 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
- on_each_cpu(do_flush_tlb_all, NULL, 1);
+
+ /* First try (faster) hardware-assisted TLB invalidation. */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
+ /* Fall back to the IPI-based invalidation. */
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+/* Flush an arbitrarily large range of memory with INVLPGB. */
+static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
+{
+ unsigned long addr, nr;
+
+ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
+ nr = (info->end - addr) >> PAGE_SHIFT;
+
+ /*
+ * INVLPGB has a limit on the size of ranges it can
+ * flush. Break up large flushes.
+ */
+ nr = clamp_val(nr, 1, invlpgb_count_max);
+
+ invlpgb_flush_addr_nosync(addr, nr);
+ }
+ __tlbsync();
}
static void do_kernel_range_flush(void *info)
@@ -1087,6 +1111,22 @@ static void do_kernel_range_flush(void *info)
flush_tlb_one_kernel(addr);
}
+static void kernel_tlb_flush_all(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+static void kernel_tlb_flush_range(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_kernel_range_flush(info);
+ else
+ on_each_cpu(do_kernel_range_flush, info, 1);
+}
+
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
struct flush_tlb_info *info;
@@ -1097,9 +1137,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
TLB_GENERATION_INVALID);
if (info->end == TLB_FLUSH_ALL)
- on_each_cpu(do_flush_tlb_all, NULL, 1);
+ kernel_tlb_flush_all(info);
else
- on_each_cpu(do_kernel_range_flush, info, 1);
+ kernel_tlb_flush_range(info);
put_flush_tlb_info();
}
© 2016 - 2025 Red Hat, Inc.