[tip: sched/core] sched/cache: Calculate the percpu sd task LLC preference

tip-bot2 for Tim Chen posted 1 patch 4 days, 14 hours ago
kernel/sched/fair.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
[tip: sched/core] sched/cache: Calculate the percpu sd task LLC preference
Posted by tip-bot2 for Tim Chen 4 days, 14 hours ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     82c960aee304bf286552046b66d5b0b3933b2418
Gitweb:        https://git.kernel.org/tip/82c960aee304bf286552046b66d5b0b3933b2418
Author:        Tim Chen <tim.c.chen@linux.intel.com>
AuthorDate:    Wed, 01 Apr 2026 14:52:21 -07:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 09 Apr 2026 15:49:49 +02:00

sched/cache: Calculate the percpu sd task LLC preference

Calculate the number of tasks' LLC preferences for each runqueue.
This statistic is computed during task enqueue and dequeue
operations, and is used by the cache-aware load balancing.

Co-developed-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/d15a64436d3acd19c5c53344c5e9d3d0b79b3233.1775065312.git.tim.c.chen@linux.intel.com
---
 kernel/sched/fair.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e66da7a..7d52cf0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1386,6 +1386,7 @@ static int llc_id(int cpu)
 
 static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
 {
+	struct sched_domain *sd;
 	int pref_llc;
 
 	pref_llc = p->preferred_llc;
@@ -1394,10 +1395,15 @@ static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
 
 	rq->nr_llc_running++;
 	rq->nr_pref_llc_running += (pref_llc == task_llc(p));
+
+	sd = rcu_dereference_all(rq->sd);
+	if (sd && (unsigned int)pref_llc < sd->llc_max)
+		sd->llc_counts[pref_llc]++;
 }
 
 static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
 {
+	struct sched_domain *sd;
 	int pref_llc;
 
 	pref_llc = p->preferred_llc;
@@ -1406,6 +1412,24 @@ static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
 
 	rq->nr_llc_running--;
 	rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
+
+	sd = rcu_dereference_all(rq->sd);
+	if (sd && (unsigned int)pref_llc < sd->llc_max) {
+		/*
+		 * There is a race condition between dequeue
+		 * and CPU hotplug. After a task has been enqueued
+		 * on CPUx, a CPU hotplug event occurs, and all online
+		 * CPUs (including CPUx) rebuild their sched_domains
+		 * and reset statistics to zero(including sd->llc_counts).
+		 * This can cause temporary undercount and we have to
+		 * check for such underflow in sd->llc_counts.
+		 *
+		 * This undercount is temporary and accurate accounting
+		 * will resume once the rq has a chance to be idle.
+		 */
+		if (sd->llc_counts[pref_llc])
+			sd->llc_counts[pref_llc]--;
+	}
 }
 
 void mm_init_sched(struct mm_struct *mm,