[tip: sched/core] sched/cache: Annotate lockless accesses to mm->sc_stat.cpu

Posted by tip-bot2 for Chen Yu 3 weeks, 4 days ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     91d07324c9305c0e4afff0cc859cac96594daa88
Gitweb:        https://git.kernel.org/tip/91d07324c9305c0e4afff0cc859cac96594daa88
Author:        Chen Yu <yu.c.chen@intel.com>
AuthorDate:    Wed, 13 May 2026 13:39:20 -07:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 18 May 2026 21:33:16 +02:00

sched/cache: Annotate lockless accesses to mm->sc_stat.cpu

mm->sc_stat.cpu is written by task_cache_work() and could be read
locklessly by several functions on other CPUs.  Use READ_ONCE and
WRITE_ONCE on mm->sc_stat.cpu access and write to prevent inconsistent
values from compiler optimizations when there are multiple accesses.

For example in get_pref_llc(), if the writer updated the field between
two compiler-generated loads, the validation (e.g., cpu != -1) and
subsequent use (e.g., llc_id(cpu)) could operate on different values,
allowing a negative CPU ID to be used as an index.

Leave plain write in mm_init_sched(), where the mm is not
yet visible to other CPUs.

This bug was reported by sashiko.

Fixes: 47d8696b95f7 ("sched/cache: Assign preferred LLC ID to processes")
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Co-developed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/63ea494f12efcf265d7134400a06cd75d7f2c310.1778703694.git.tim.c.chen@linux.intel.com
---
 kernel/sched/fair.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 663968b..087445e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1598,13 +1598,14 @@ static unsigned long fraction_mm_sched(struct rq *rq,
 
 static int get_pref_llc(struct task_struct *p, struct mm_struct *mm)
 {
-	int mm_sched_llc = -1;
+	int mm_sched_llc = -1, mm_sched_cpu;
 
 	if (!mm)
 		return -1;
 
-	if (mm->sc_stat.cpu != -1) {
-		mm_sched_llc = llc_id(mm->sc_stat.cpu);
+	mm_sched_cpu = READ_ONCE(mm->sc_stat.cpu);
+	if (mm_sched_cpu != -1) {
+		mm_sched_llc = llc_id(mm_sched_cpu);
 
 #ifdef CONFIG_NUMA_BALANCING
 		/*
@@ -1619,7 +1620,7 @@ static int get_pref_llc(struct task_struct *p, struct mm_struct *mm)
 		 */
 		if (static_branch_likely(&sched_numa_balancing) &&
 		    p->numa_preferred_nid >= 0 &&
-		    cpu_to_node(mm->sc_stat.cpu) != p->numa_preferred_nid)
+		    cpu_to_node(mm_sched_cpu) != p->numa_preferred_nid)
 			mm_sched_llc = -1;
 #endif
 	}
@@ -1665,8 +1666,8 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 	if ((long)(epoch - READ_ONCE(mm->sc_stat.epoch)) > llc_epoch_affinity_timeout ||
 	    invalid_llc_nr(mm, p, cpu_of(rq)) ||
 	    exceed_llc_capacity(mm, cpu_of(rq))) {
-		if (mm->sc_stat.cpu != -1)
-			mm->sc_stat.cpu = -1;
+		if (READ_ONCE(mm->sc_stat.cpu) != -1)
+			WRITE_ONCE(mm->sc_stat.cpu, -1);
 	}
 
 	mm_sched_llc = get_pref_llc(p, mm);
@@ -1714,7 +1715,7 @@ static void get_scan_cpumasks(cpumask_var_t cpus, struct task_struct *p)
 	if (!static_branch_likely(&sched_numa_balancing))
 		goto out;
 
-	cpu = p->mm->sc_stat.cpu;
+	cpu = READ_ONCE(p->mm->sc_stat.cpu);
 	if (cpu != -1)
 		nid = cpu_to_node(cpu);
 	curr_cpu = task_cpu(p);
@@ -1799,8 +1800,8 @@ static void task_cache_work(struct callback_head *work)
 	curr_cpu = task_cpu(p);
 	if (invalid_llc_nr(mm, p, curr_cpu) ||
 	    exceed_llc_capacity(mm, curr_cpu)) {
-		if (mm->sc_stat.cpu != -1)
-			mm->sc_stat.cpu = -1;
+		if (READ_ONCE(mm->sc_stat.cpu) != -1)
+			WRITE_ONCE(mm->sc_stat.cpu, -1);
 
 		return;
 	}
@@ -1857,7 +1858,7 @@ static void task_cache_work(struct callback_head *work)
 				m_a_cpu = m_cpu;
 			}
 
-			if (llc_id(cpu) == llc_id(mm->sc_stat.cpu))
+			if (llc_id(cpu) == llc_id(READ_ONCE(mm->sc_stat.cpu)))
 				curr_m_a_occ = a_occ;
 
 			cpumask_andnot(cpus, cpus, sched_domain_span(sd));
@@ -1875,7 +1876,7 @@ static void task_cache_work(struct callback_head *work)
 		 * 3. 2X is chosen based on test results, as it delivers
 		 *    the optimal performance gain so far.
 		 */
-		mm->sc_stat.cpu = m_a_cpu;
+		WRITE_ONCE(mm->sc_stat.cpu, m_a_cpu);
 	}
 
 	update_avg_scale(&mm->sc_stat.nr_running_avg, nr_running);
@@ -10441,15 +10442,15 @@ static enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
 	if (!mm)
 		return mig_unrestricted;
 
-	cpu = mm->sc_stat.cpu;
+	cpu = READ_ONCE(mm->sc_stat.cpu);
 	if (cpu < 0 || cpus_share_cache(src_cpu, dst_cpu))
 		return mig_unrestricted;
 
 	/* skip cache aware load balance for too many threads */
 	if (invalid_llc_nr(mm, p, dst_cpu) ||
 	    exceed_llc_capacity(mm, dst_cpu)) {
-		if (mm->sc_stat.cpu != -1)
-			mm->sc_stat.cpu = -1;
+		if (READ_ONCE(mm->sc_stat.cpu) != -1)
+			WRITE_ONCE(mm->sc_stat.cpu, -1);
 		return mig_unrestricted;
 	}