arch/x86/mm/tlb.c | 63 +++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 29 deletions(-)
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Have it return the two things it does return:
- a new ASID and
- the need to flush the TLB or not,
in a struct which fits in a single 32-bit register and whack the IO
parameters.
No functional changes.
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
---
arch/x86/mm/tlb.c | 63 +++++++++++++++++++++++++----------------------
1 file changed, 34 insertions(+), 29 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e459d97ef397..d00ae21d0ee2 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -215,16 +215,20 @@ static void clear_asid_other(void)
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+struct new_asid {
+ unsigned int asid : 16;
+ unsigned int need_flush : 1;
+};
-static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
- u16 *new_asid, bool *need_flush)
+static struct new_asid choose_new_asid(struct mm_struct *next, u64 next_tlb_gen)
{
+ struct new_asid ns;
u16 asid;
if (!static_cpu_has(X86_FEATURE_PCID)) {
- *new_asid = 0;
- *need_flush = true;
- return;
+ ns.asid = 0;
+ ns.need_flush = 1;
+ return ns;
}
/*
@@ -235,9 +239,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
u16 global_asid = mm_global_asid(next);
if (global_asid) {
- *new_asid = global_asid;
- *need_flush = false;
- return;
+ ns.asid = global_asid;
+ ns.need_flush = 0;
+ return ns;
}
}
@@ -249,22 +253,23 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
next->context.ctx_id)
continue;
- *new_asid = asid;
- *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
- next_tlb_gen);
- return;
+ ns.asid = asid;
+ ns.need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < next_tlb_gen);
+ return ns;
}
/*
* We don't currently own an ASID slot on this CPU.
* Allocate a slot.
*/
- *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
- if (*new_asid >= TLB_NR_DYN_ASIDS) {
- *new_asid = 0;
+ ns.asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
+ if (ns.asid >= TLB_NR_DYN_ASIDS) {
+ ns.asid = 0;
this_cpu_write(cpu_tlbstate.next_asid, 1);
}
- *need_flush = true;
+ ns.need_flush = true;
+
+ return ns;
}
/*
@@ -781,9 +786,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id();
unsigned long new_lam;
+ struct new_asid ns;
u64 next_tlb_gen;
- bool need_flush;
- u16 new_asid;
+
/* We don't want flush_tlb_func() to run concurrently with us. */
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
@@ -854,7 +859,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
/* Check if the current mm is transitioning to a global ASID */
if (mm_needs_global_asid(next, prev_asid)) {
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ ns = choose_new_asid(next, next_tlb_gen);
goto reload_tlb;
}
@@ -889,8 +894,8 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
* TLB contents went out of date while we were in lazy
* mode. Fall through to the TLB switching code below.
*/
- new_asid = prev_asid;
- need_flush = true;
+ ns.asid = prev_asid;
+ ns.need_flush = true;
} else {
/*
* Apply process to process speculation vulnerability
@@ -918,21 +923,21 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
cpumask_set_cpu(cpu, mm_cpumask(next));
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ ns = choose_new_asid(next, next_tlb_gen);
}
reload_tlb:
new_lam = mm_lam_cr3_mask(next);
- if (need_flush) {
- VM_WARN_ON_ONCE(is_global_asid(new_asid));
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
+ if (ns.need_flush) {
+ VM_WARN_ON_ONCE(is_global_asid(ns.asid));
+ this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.ctxs[ns.asid].tlb_gen, next_tlb_gen);
+ load_new_mm_cr3(next->pgd, ns.asid, new_lam, true);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
+ load_new_mm_cr3(next->pgd, ns.asid, new_lam, false);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@@ -941,7 +946,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
barrier();
this_cpu_write(cpu_tlbstate.loaded_mm, next);
- this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, ns.asid);
cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next));
if (next != prev) {
--
2.43.0
* Borislav Petkov <bp@kernel.org> wrote: > From: "Borislav Petkov (AMD)" <bp@alien8.de> > > Have it return the two things it does return: > > - a new ASID and > - the need to flush the TLB or not, > > in a struct which fits in a single 32-bit register and whack the IO > parameters. > > No functional changes. > > Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> > --- > arch/x86/mm/tlb.c | 63 +++++++++++++++++++++++++---------------------- > 1 file changed, 34 insertions(+), 29 deletions(-) > > diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c > index e459d97ef397..d00ae21d0ee2 100644 > --- a/arch/x86/mm/tlb.c > +++ b/arch/x86/mm/tlb.c > @@ -215,16 +215,20 @@ static void clear_asid_other(void) > > atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); > > +struct new_asid { > + unsigned int asid : 16; > + unsigned int need_flush : 1; > +}; > > -static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, > - u16 *new_asid, bool *need_flush) > +static struct new_asid choose_new_asid(struct mm_struct *next, u64 next_tlb_gen) Nice!! Note how the cleaned up return signature not only makes the code easier to read, but also helps the compiler generate better code: # arch/x86/mm/tlb.o: text data bss dec hex filename 9341 753 516 10610 2972 tlb.o.before 9213 753 516 10482 28f2 tlb.o.after Personally I also like the non-bool new_asid::need_flush easier to read in this 'HW interface' context. Thanks, Ingo
On Thu, Apr 03, 2025 at 01:36:20PM +0200, Ingo Molnar wrote: > Note how the cleaned up return signature not only makes the code easier > to read, but also helps the compiler generate better code: Yap, and the psABI guarantees that struct is returned in a u32 reg so the asm looks pretty good. > # arch/x86/mm/tlb.o: > > text data bss dec hex filename > 9341 753 516 10610 2972 tlb.o.before > 9213 753 516 10482 28f2 tlb.o.after > > Personally I also like the non-bool new_asid::need_flush easier to read > in this 'HW interface' context. Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: 2fb34b1566a386913b291d04f91ba6f6e6a5bb99
Gitweb: https://git.kernel.org/tip/2fb34b1566a386913b291d04f91ba6f6e6a5bb99
Author: Borislav Petkov (AMD) <bp@alien8.de>
AuthorDate: Thu, 03 Apr 2025 10:56:23 +02:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 03 Apr 2025 13:35:37 +02:00
x86/tlb: Simplify choose_new_asid() and generate better code
Have it return the two things it does return:
- a new ASID and
- the need to flush the TLB or not,
in a struct which fits in a single 32-bit register and whack the IO
parameters.
Beyond being easier to read, this also helps the compiler generate
better, more compact code:
# arch/x86/mm/tlb.o:
text data bss dec hex filename
9341 753 516 10610 2972 tlb.o.before
9213 753 516 10482 28f2 tlb.o.after
No functional changes.
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20250403085623.20824-1-bp@kernel.org
---
arch/x86/mm/tlb.c | 63 ++++++++++++++++++++++++----------------------
1 file changed, 34 insertions(+), 29 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e459d97..d00ae21 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -215,16 +215,20 @@ static void clear_asid_other(void)
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+struct new_asid {
+ unsigned int asid : 16;
+ unsigned int need_flush : 1;
+};
-static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
- u16 *new_asid, bool *need_flush)
+static struct new_asid choose_new_asid(struct mm_struct *next, u64 next_tlb_gen)
{
+ struct new_asid ns;
u16 asid;
if (!static_cpu_has(X86_FEATURE_PCID)) {
- *new_asid = 0;
- *need_flush = true;
- return;
+ ns.asid = 0;
+ ns.need_flush = 1;
+ return ns;
}
/*
@@ -235,9 +239,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
u16 global_asid = mm_global_asid(next);
if (global_asid) {
- *new_asid = global_asid;
- *need_flush = false;
- return;
+ ns.asid = global_asid;
+ ns.need_flush = 0;
+ return ns;
}
}
@@ -249,22 +253,23 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
next->context.ctx_id)
continue;
- *new_asid = asid;
- *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
- next_tlb_gen);
- return;
+ ns.asid = asid;
+ ns.need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < next_tlb_gen);
+ return ns;
}
/*
* We don't currently own an ASID slot on this CPU.
* Allocate a slot.
*/
- *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
- if (*new_asid >= TLB_NR_DYN_ASIDS) {
- *new_asid = 0;
+ ns.asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
+ if (ns.asid >= TLB_NR_DYN_ASIDS) {
+ ns.asid = 0;
this_cpu_write(cpu_tlbstate.next_asid, 1);
}
- *need_flush = true;
+ ns.need_flush = true;
+
+ return ns;
}
/*
@@ -781,9 +786,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id();
unsigned long new_lam;
+ struct new_asid ns;
u64 next_tlb_gen;
- bool need_flush;
- u16 new_asid;
+
/* We don't want flush_tlb_func() to run concurrently with us. */
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
@@ -854,7 +859,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
/* Check if the current mm is transitioning to a global ASID */
if (mm_needs_global_asid(next, prev_asid)) {
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ ns = choose_new_asid(next, next_tlb_gen);
goto reload_tlb;
}
@@ -889,8 +894,8 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
* TLB contents went out of date while we were in lazy
* mode. Fall through to the TLB switching code below.
*/
- new_asid = prev_asid;
- need_flush = true;
+ ns.asid = prev_asid;
+ ns.need_flush = true;
} else {
/*
* Apply process to process speculation vulnerability
@@ -918,21 +923,21 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
cpumask_set_cpu(cpu, mm_cpumask(next));
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ ns = choose_new_asid(next, next_tlb_gen);
}
reload_tlb:
new_lam = mm_lam_cr3_mask(next);
- if (need_flush) {
- VM_WARN_ON_ONCE(is_global_asid(new_asid));
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
+ if (ns.need_flush) {
+ VM_WARN_ON_ONCE(is_global_asid(ns.asid));
+ this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.ctxs[ns.asid].tlb_gen, next_tlb_gen);
+ load_new_mm_cr3(next->pgd, ns.asid, new_lam, true);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
+ load_new_mm_cr3(next->pgd, ns.asid, new_lam, false);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@@ -941,7 +946,7 @@ reload_tlb:
barrier();
this_cpu_write(cpu_tlbstate.loaded_mm, next);
- this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, ns.asid);
cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next));
if (next != prev) {
© 2016 - 2025 Red Hat, Inc.