Aside from generating slightly better code for not having to use %fs
prefixed ops, the real purpose is to clarify code by switching some to
smp_store_release() later on.
Notably, this_cpu_{read,write}() imply {READ,WRITE}_ONCE().
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/mm/tlb.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -51,7 +51,7 @@
/*
* Bits to mangle the TIF_SPEC_* state into the mm pointer which is
- * stored in cpu_tlb_state.last_user_mm_spec.
+ * stored in cpu_tlbstate.last_user_mm_spec.
*/
#define LAST_USER_MM_IBPB 0x1UL
#define LAST_USER_MM_L1D_FLUSH 0x2UL
@@ -782,8 +782,9 @@ static inline void cr4_update_pce_mm(str
void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
struct task_struct *tsk)
{
- struct mm_struct *prev = this_cpu_read(cpu_tlbstate.loaded_mm);
- u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ struct tlb_state *this_tlbstate = this_cpu_ptr(&cpu_tlbstate);
+ struct mm_struct *prev = READ_ONCE(this_tlbstate->loaded_mm);
+ u16 prev_asid = READ_ONCE(this_tlbstate->loaded_mm_asid);
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id();
unsigned long new_lam;
@@ -840,7 +841,7 @@ void switch_mm_irqs_off(struct mm_struct
if (prev == next) {
/* Not actually switching mm's */
VM_WARN_ON(is_dyn_asid(prev_asid) &&
- this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+ READ_ONCE(this_tlbstate->ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);
/*
@@ -888,7 +889,7 @@ void switch_mm_irqs_off(struct mm_struct
*/
smp_mb();
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
+ if (READ_ONCE(this_tlbstate->ctxs[prev_asid].tlb_gen) ==
next_tlb_gen)
return;
@@ -910,7 +911,7 @@ void switch_mm_irqs_off(struct mm_struct
* and others are sensitive to the window where mm_cpumask(),
* CR3 and cpu_tlbstate.loaded_mm are not all in sync.
*/
- this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+ WRITE_ONCE(this_tlbstate->loaded_mm, LOADED_MM_SWITCHING);
barrier();
/* Start receiving IPIs and then read tlb_gen (and LAM below) */
@@ -925,8 +926,8 @@ void switch_mm_irqs_off(struct mm_struct
new_lam = mm_lam_cr3_mask(next);
if (ns.need_flush) {
VM_WARN_ON_ONCE(is_global_asid(ns.asid));
- this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id);
- this_cpu_write(cpu_tlbstate.ctxs[ns.asid].tlb_gen, next_tlb_gen);
+ WRITE_ONCE(this_tlbstate->ctxs[ns.asid].ctx_id, next->context.ctx_id);
+ WRITE_ONCE(this_tlbstate->ctxs[ns.asid].tlb_gen, next_tlb_gen);
load_new_mm_cr3(next->pgd, ns.asid, new_lam, true);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
@@ -940,8 +941,8 @@ void switch_mm_irqs_off(struct mm_struct
/* Make sure we write CR3 before loaded_mm. */
barrier();
- this_cpu_write(cpu_tlbstate.loaded_mm, next);
- this_cpu_write(cpu_tlbstate.loaded_mm_asid, ns.asid);
+ WRITE_ONCE(this_tlbstate->loaded_mm, next);
+ WRITE_ONCE(this_tlbstate->loaded_mm_asid, ns.asid);
cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next));
if (next != prev) {
On 5/20/25 03:55, Peter Zijlstra wrote:
> Aside from generating slightly better code for not having to use %fs
> prefixed ops, the real purpose is to clarify code by switching some to
> smp_store_release() later on.
>
> Notably, this_cpu_{read,write}() imply {READ,WRITE}_ONCE().
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
© 2016 - 2026 Red Hat, Inc.