[tip: sched/core] sched/cache: Respect LLC preference in task migration and detach

tip-bot2 for Tim Chen posted 1 patch 4 days, 14 hours ago
kernel/sched/fair.c  | 83 ++++++++++++++++++++++++++++++++++++++++---
kernel/sched/sched.h | 13 +++++++-
2 files changed, 91 insertions(+), 5 deletions(-)
[tip: sched/core] sched/cache: Respect LLC preference in task migration and detach
Posted by tip-bot2 for Tim Chen 4 days, 14 hours ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     5b1d5e6db20a6c64ffb95d04578db8c4b0228eea
Gitweb:        https://git.kernel.org/tip/5b1d5e6db20a6c64ffb95d04578db8c4b0228eea
Author:        Tim Chen <tim.c.chen@linux.intel.com>
AuthorDate:    Wed, 01 Apr 2026 14:52:27 -07:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 09 Apr 2026 15:49:51 +02:00

sched/cache: Respect LLC preference in task migration and detach

During load balancing, make can_migrate_task()
consider a task's LLC preference.
Prevent a task from being moved out of its preferred LLC.

During the regular load balancing, if
the task cannot be migrated due to LLC locality, the
nr_balance_failed also should not be increased.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Co-developed-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/53da65f3d59de31e1a1dc59a4093d8dd9d4dc206.1775065312.git.tim.c.chen@linux.intel.com
---
 kernel/sched/fair.c  | 83 ++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h | 13 +++++++-
 2 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bfb6c0c..5f22e5a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9955,6 +9955,7 @@ enum migration_type {
 #define LBF_DST_PINNED  0x04
 #define LBF_SOME_PINNED	0x08
 #define LBF_ACTIVE_LB	0x10
+#define LBF_LLC_PINNED	0x20
 
 struct lb_env {
 	struct sched_domain	*sd;
@@ -10267,8 +10268,8 @@ static enum llc_mig can_migrate_llc(int src_cpu, int dst_cpu,
  * Check if task p can migrate from source LLC to
  * destination LLC in terms of cache aware load balance.
  */
-static __maybe_unused enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
-							struct task_struct *p)
+static enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
+					 struct task_struct *p)
 {
 	struct mm_struct *mm;
 	bool to_pref;
@@ -10335,6 +10336,46 @@ alb_break_llc(struct lb_env *env)
 
 	return false;
 }
+
+/*
+ * Check if migrating task p from env->src_cpu to
+ * env->dst_cpu breaks LLC localiy.
+ */
+static bool migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
+{
+	if (!sched_cache_enabled())
+		return false;
+
+	if (task_has_sched_core(p))
+		return false;
+	/*
+	 * Skip over tasks that would degrade LLC locality;
+	 * only when nr_balanced_failed is sufficiently high do we
+	 * ignore this constraint.
+	 *
+	 * Threshold of cache_nice_tries is set to 1 higher
+	 * than nr_balance_failed to avoid excessive task
+	 * migration at the same time.
+	 */
+	if (env->sd->nr_balance_failed >= env->sd->cache_nice_tries + 1)
+		return false;
+
+	/*
+	 * We know the env->src_cpu has some tasks prefer to
+	 * run on env->dst_cpu, skip the tasks do not prefer
+	 * env->dst_cpu, and find the one that prefers.
+	 */
+	if (env->migration_type == migrate_llc_task &&
+	    READ_ONCE(p->preferred_llc) != llc_id(env->dst_cpu))
+		return true;
+
+	if (can_migrate_llc_task(env->src_cpu,
+				 env->dst_cpu, p) != mig_forbid)
+		return false;
+
+	return true;
+}
+
 #else
 static inline bool get_llc_stats(int cpu, unsigned long *util,
 				 unsigned long *cap)
@@ -10347,6 +10388,12 @@ alb_break_llc(struct lb_env *env)
 {
 	return false;
 }
+
+static inline bool
+migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
+{
+	return false;
+}
 #endif
 /*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
@@ -10444,10 +10491,29 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 		return 1;
 
 	degrades = migrate_degrades_locality(p, env);
-	if (!degrades)
+	if (!degrades) {
+		/*
+		 * If the NUMA locality is not broken,
+		 * further check if migration would hurt
+		 * LLC locality.
+		 */
+		if (migrate_degrades_llc(p, env)) {
+			/*
+			 * If regular load balancing fails to pull a task
+			 * due to LLC locality, this is expected behavior
+			 * and we set LBF_LLC_PINNED so we don't increase
+			 * nr_balance_failed unecessarily.
+			 */
+			if (env->migration_type != migrate_llc_task)
+				env->flags |= LBF_LLC_PINNED;
+
+			return 0;
+		}
+
 		hot = task_hot(p, env);
-	else
+	} else {
 		hot = degrades > 0;
+	}
 
 	if (!hot || env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
 		if (hot)
@@ -13067,9 +13133,16 @@ more_balance:
 		 *
 		 * Similarly for migration_misfit which is not related to
 		 * load/util migration, don't pollute nr_balance_failed.
+		 *
+		 * The same for cache aware scheduling's allowance for
+		 * load imbalance. If regular load balance does not
+		 * migrate task due to LLC locality, it is a expected
+		 * behavior and don't pollute nr_balance_failed.
+		 * See can_migrate_task().
 		 */
 		if (idle != CPU_NEWLY_IDLE &&
-		    env.migration_type != migrate_misfit)
+		    env.migration_type != migrate_misfit &&
+		    !(env.flags & LBF_LLC_PINNED))
 			sd->nr_balance_failed++;
 
 		if (need_active_balance(&env)) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3c9e92b..a56619b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1547,6 +1547,14 @@ extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags);
 extern void sched_core_get(void);
 extern void sched_core_put(void);
 
+static inline bool task_has_sched_core(struct task_struct *p)
+{
+	if (sched_core_disabled())
+		return false;
+
+	return !!p->core_cookie;
+}
+
 #else /* !CONFIG_SCHED_CORE: */
 
 static inline bool sched_core_enabled(struct rq *rq)
@@ -1587,6 +1595,11 @@ static inline bool sched_group_cookie_match(struct rq *rq,
 	return true;
 }
 
+static inline bool task_has_sched_core(struct task_struct *p)
+{
+	return false;
+}
+
 #endif /* !CONFIG_SCHED_CORE */
 
 #ifdef CONFIG_RT_GROUP_SCHED