sched/cache: Skip cache-aware scheduling for single-threaded processes

[tip: sched/core] sched/cache: Skip cache-aware scheduling for single-threaded processes
Posted by tip-bot2 for Chen Yu 3 weeks, 4 days ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     7b34bb1ca324451c84c0a69136ce92e7928cf72b
Gitweb:        https://git.kernel.org/tip/7b34bb1ca324451c84c0a69136ce92e7928cf72b
Author:        Chen Yu <yu.c.chen@intel.com>
AuthorDate:    Wed, 13 May 2026 13:39:14 -07:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 18 May 2026 21:33:14 +02:00

sched/cache: Skip cache-aware scheduling for single-threaded processes

For a single thread, the current wakeup path tends to place it
on the same LLC where it was previously running with cache-hot
data. There is no need to enable cache-aware scheduling for
single-threaded processes for the following reasons:

1. Cache-aware scheduling primarily benefits multi-threaded
   processes where threads share data. Single-threaded processes
   typically have no inter-thread data sharing and thus gain little.

2. Enabling it incurs the additional overhead of tracking the
   thread's residency in the LLCs.

3. Bypassing single-threaded processes avoids excessive
   concentration of such tasks on a single LLC.

Nevertheless, this check can be omitted if users explicitly
provide hints for such single-threaded workloads where different
processes have shared memory, e.g., via prctl() or other interfaces
to be added in the future.

Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Co-developed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Tingyin Duan <tingyin.duan@gmail.com>
Link: https://patch.msgid.link/8a59a13aa58fdb48e410ecb2aabd97fe3ea5d256.1778703694.git.tim.c.chen@linux.intel.com
---
 kernel/sched/fair.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 808f614..df21366 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1384,8 +1384,12 @@ static int llc_id(int cpu)
 	return per_cpu(sd_llc_id, cpu);
 }
 
-static bool invalid_llc_nr(struct mm_struct *mm, int cpu)
+static bool invalid_llc_nr(struct mm_struct *mm, struct task_struct *p,
+			   int cpu)
 {
+	if (get_nr_threads(p) <= 1)
+		return true;
+
 	return !fits_capacity((mm->sc_stat.nr_running_avg * cpu_smt_num_threads),
 			per_cpu(sd_llc_size, cpu));
 }
@@ -1581,7 +1585,7 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 	 * its preferred state.
 	 */
 	if (epoch - READ_ONCE(mm->sc_stat.epoch) > EPOCH_LLC_AFFINITY_TIMEOUT ||
-	    invalid_llc_nr(mm, cpu_of(rq))) {
+	    invalid_llc_nr(mm, p, cpu_of(rq))) {
 		if (mm->sc_stat.cpu != -1)
 			mm->sc_stat.cpu = -1;
 	}
@@ -1687,9 +1691,9 @@ static inline void update_avg_scale(u64 *avg, u64 sample)
 
 static void task_cache_work(struct callback_head *work)
 {
+	int cpu, m_a_cpu = -1, nr_running = 0, curr_cpu;
 	unsigned long next_scan, now = jiffies;
 	struct task_struct *p = current, *cur;
-	int cpu, m_a_cpu = -1, nr_running = 0;
 	unsigned long curr_m_a_occ = 0;
 	struct mm_struct *mm = p->mm;
 	unsigned long m_a_occ = 0;
@@ -1711,6 +1715,14 @@ static void task_cache_work(struct callback_head *work)
 			 now + EPOCH_PERIOD))
 		return;
 
+	curr_cpu = task_cpu(p);
+	if (invalid_llc_nr(mm, p, curr_cpu)) {
+		if (mm->sc_stat.cpu != -1)
+			mm->sc_stat.cpu = -1;
+
+		return;
+	}
+
 	if (!zalloc_cpumask_var(&cpus, GFP_KERNEL))
 		return;
 
@@ -10326,7 +10338,7 @@ static enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
 		return mig_unrestricted;
 
 	/* skip cache aware load balance for too many threads */
-	if (invalid_llc_nr(mm, dst_cpu)) {
+	if (invalid_llc_nr(mm, p, dst_cpu)) {
 		if (mm->sc_stat.cpu != -1)
 			mm->sc_stat.cpu = -1;
 		return mig_unrestricted;