[tip: sched/core] sched/cache: Fix race condition during sched domain rebuild

tip-bot2 for Chen Yu posted 1 patch 4 days, 14 hours ago
kernel/sched/debug.c    |  4 +++-
kernel/sched/sched.h    |  2 +-
kernel/sched/topology.c | 43 ++++++++++++++--------------------------
3 files changed, 20 insertions(+), 29 deletions(-)
[tip: sched/core] sched/cache: Fix race condition during sched domain rebuild
Posted by tip-bot2 for Chen Yu 4 days, 14 hours ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     9f7c745850b4b1b7e4706ae81f04c43f204a6a8d
Gitweb:        https://git.kernel.org/tip/9f7c745850b4b1b7e4706ae81f04c43f204a6a8d
Author:        Chen Yu <yu.c.chen@intel.com>
AuthorDate:    Wed, 13 May 2026 13:39:23 -07:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 18 May 2026 21:33:17 +02:00

sched/cache: Fix race condition during sched domain rebuild

sched_cache_active_set_unlocked() checks hardware support without
locks:
static void sched_cache_active_set(bool locked)
{
        /* hardware does not support */
        if (!static_branch_likely(&sched_cache_present)) {
                _sched_cache_active_set(false, locked);
                return;
        }
    ...
If build_sched_domains() runs concurrently during CPU hotplug,
it can disable sched_cache_present under sched_domains_mutex
and the CPU hotplug lock. If a debugfs write thread evaluates
sched_cache_present as true right before that, and then blocks
or gets preempted, it might proceed to enable sched_cache_active
after the hardware support has been marked as absent. Make it
safer by acquiring cpus_read_lock() and sched_domains_mutex_lock()
when the user changes sched_cache_active via debugfs.

This bug was reported by sashiko.

Fixes: 067a31358143 ("sched/cache: Allow the user space to turn on and off cache aware scheduling")
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Co-developed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/9afddf439687f04bb56b46625bd9f153eb8abad5.1778703694.git.tim.c.chen@linux.intel.com
---
 kernel/sched/debug.c    |  4 +++-
 kernel/sched/sched.h    |  2 +-
 kernel/sched/topology.c | 43 ++++++++++++++--------------------------
 3 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index fe56953..ed3a0d6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -224,7 +224,9 @@ sched_cache_enable_write(struct file *filp, const char __user *ubuf,
 
 	sysctl_sched_cache_user = val;
 
-	sched_cache_active_set_unlocked();
+	sched_cache_active_set();
+
+	*ppos += cnt;
 
 	return cnt;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2740939..45a3b77 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -4083,7 +4083,7 @@ static inline bool sched_cache_enabled(void)
 	return static_branch_unlikely(&sched_cache_active);
 }
 
-extern void sched_cache_active_set_unlocked(void);
+extern void sched_cache_active_set(void);
 
 #endif
 
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 7248a72..c257134 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -917,30 +917,20 @@ err:
 	return false;
 }
 
-static void _sched_cache_active_set(bool enable, bool locked)
-{
-	if (enable) {
-		if (locked)
-			static_branch_enable_cpuslocked(&sched_cache_active);
-		else
-			static_branch_enable(&sched_cache_active);
-	} else {
-		if (locked)
-			static_branch_disable_cpuslocked(&sched_cache_active);
-		else
-			static_branch_disable(&sched_cache_active);
-	}
-}
-
 /*
  * Enable/disable cache aware scheduling according to
  * user input and the presence of hardware support.
  */
-static void sched_cache_active_set(bool locked)
+static void _sched_cache_active_set(void)
 {
+	lockdep_assert_cpus_held();
+	lockdep_assert_held(&sched_domains_mutex);
+
 	/* hardware does not support */
 	if (!static_branch_likely(&sched_cache_present)) {
-		_sched_cache_active_set(false, locked);
+		static_branch_disable_cpuslocked(&sched_cache_active);
+		if (sched_debug())
+			pr_info("%s: cache aware scheduling not supported on this platform\n", __func__);
 		return;
 	}
 
@@ -951,24 +941,23 @@ static void sched_cache_active_set(bool locked)
 	 * for now.
 	 */
 	if (sysctl_sched_cache_user) {
-		_sched_cache_active_set(true, locked);
+		static_branch_enable_cpuslocked(&sched_cache_active);
 		if (sched_debug())
 			pr_info("%s: enabling cache aware scheduling\n", __func__);
 	} else {
-		_sched_cache_active_set(false, locked);
+		static_branch_disable_cpuslocked(&sched_cache_active);
 		if (sched_debug())
 			pr_info("%s: disabling cache aware scheduling\n", __func__);
 	}
 }
 
-static void sched_cache_active_set_locked(void)
+void sched_cache_active_set(void)
 {
-	return sched_cache_active_set(true);
-}
-
-void sched_cache_active_set_unlocked(void)
-{
-	return sched_cache_active_set(false);
+	cpus_read_lock();
+	sched_domains_mutex_lock();
+	_sched_cache_active_set();
+	sched_domains_mutex_unlock();
+	cpus_read_unlock();
 }
 
 /*
@@ -3082,7 +3071,7 @@ error:
 	else
 		static_branch_disable_cpuslocked(&sched_cache_present);
 
-	sched_cache_active_set_locked();
+	_sched_cache_active_set();
 #endif
 	__free_domain_allocs(&d, alloc_state, cpu_map);